diff --git a/.gitignore b/.gitignore index ae39cda..2ad489c 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ Cargo.lock .criterion perf.data* +/tmp diff --git a/Cargo.toml b/Cargo.toml index 9d00247..0a3ca71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,13 +24,15 @@ travis-ci = { repository = "jonhoo/hdrsample" } [features] bench_private = [] # for enabling nightly-only feature(test) on the main crate to allow benchmarking private code -serialization = [ "flate2" ] +serialization = [ "flate2", "nom", "base64" ] default = [ "serialization" ] [dependencies] num-traits = "0.1" byteorder = "1.0.0" flate2 = { version = "0.2.17", optional = true } +nom = { version = "^3.2.0", optional = true } +base64 = { version = "0.7.0", optional = true } [dev-dependencies] rand = "0.3.15" diff --git a/benches/interval_log.rs b/benches/interval_log.rs new file mode 100644 index 0000000..2984e80 --- /dev/null +++ b/benches/interval_log.rs @@ -0,0 +1,88 @@ +#![feature(test)] + +extern crate hdrsample; +extern crate rand; +extern crate test; + +use std::time; + +use hdrsample::*; +use hdrsample::serialization; +use hdrsample::serialization::interval_log; +use test::Bencher; + +use self::rand_varint::*; + +#[path = "../src/serialization/rand_varint.rs"] +mod rand_varint; + +#[bench] +fn write_interval_log_1k_hist_10k_value(b: &mut Bencher) { + let mut log = Vec::new(); + let mut histograms = Vec::new(); + let mut rng = rand::weak_rng(); + + for _ in 0..1000 { + let mut h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); + + for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) { + h.record(v).unwrap(); + } + + histograms.push(h); + } + + let mut serializer = serialization::V2Serializer::new(); + + b.iter(|| { + log.clear(); + + let mut writer = interval_log::IntervalLogWriterBuilder::new() + .begin_log_with(&mut log, &mut serializer) + .unwrap(); + + let dur = time::Duration::new(5, 678_000_000); + for h in histograms.iter() { + writer + .write_histogram(h, time::Duration::new(1, 234_000_000), dur, None) + .unwrap(); + } + }) +} + +#[bench] +fn parse_interval_log_1k_hist_10k_value(b: &mut Bencher) { + let mut log = Vec::new(); + let mut histograms = Vec::new(); + let mut rng = rand::weak_rng(); + + for _ in 0..1000 { + let mut h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); + + for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) { + h.record(v).unwrap(); + } + + histograms.push(h); + } + + { + let mut serializer = serialization::V2Serializer::new(); + let mut writer = interval_log::IntervalLogWriterBuilder::new() + .begin_log_with(&mut log, &mut serializer) + .unwrap(); + + let dur = time::Duration::new(5, 678_000_000); + for h in histograms.iter() { + writer + .write_histogram(h, time::Duration::new(1, 234_000_000), dur, None) + .unwrap(); + } + } + + b.iter(|| { + let iter = interval_log::IntervalLogIterator::new(&log); + + assert_eq!(1000, iter.count()); + }) +} diff --git a/benches/record.rs b/benches/record.rs index 8289f21..128ba63 100644 --- a/benches/record.rs +++ b/benches/record.rs @@ -5,22 +5,24 @@ extern crate rand; extern crate test; use hdrsample::*; -use self::rand::Rng; use self::test::Bencher; +use self::rand_varint::*; + +#[path = "../src/serialization/rand_varint.rs"] +mod rand_varint; + #[bench] fn record_precalc_random_values_with_1_count_u64(b: &mut Bencher) { let mut h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); let mut indices = Vec::::new(); - // TODO improve this and similar benchmarks to use a non-uniform distribution (like that used - // in serialization tests) so we're not always recording in the top few buckets let mut rng = rand::weak_rng(); // same value approach as record_precalc_random_values_with_max_count_u64 so that they are // comparable - for _ in 0..1000_000 { - indices.push(rng.gen()); + for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) { + indices.push(v); } b.iter(|| { @@ -39,10 +41,9 @@ fn record_precalc_random_values_with_max_count_u64(b: &mut Bencher) { // store values in an array and re-use so we can be sure to hit the overflow case - for _ in 0..1000_000 { - let r = rng.gen(); - indices.push(r); - h.record_n(r, u64::max_value()).unwrap(); + for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) { + indices.push(v); + h.record_n(v, u64::max_value()).unwrap(); } b.iter(|| { @@ -59,8 +60,8 @@ fn record_correct_precalc_random_values_with_1_count_u64(b: &mut Bencher) { let mut indices = Vec::::new(); let mut rng = rand::weak_rng(); - for _ in 0..10_000 { - indices.push(rng.gen()); + for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) { + indices.push(v); } b.iter(|| { @@ -79,8 +80,10 @@ fn record_random_values_with_1_count_u64(b: &mut Bencher) { // This should be *slower* than the benchmarks above where we pre-calculate the values // outside of the hot loop. If it isn't, then those measurements are likely spurious. - b.iter(|| for _ in 0..1000_000 { - h.record(rng.gen()).unwrap() + b.iter(|| { + for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) { + h.record(v).unwrap() + } }) } @@ -136,11 +139,10 @@ fn do_subtract_benchmark Histogram>( for _ in 0..1000 { let mut h = addend_factory(); - for _ in 0..1000 { - let r = rng.gen(); - h.record_n(r, count_at_each_addend_value).unwrap(); + for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) { + h.record_n(v, count_at_each_addend_value).unwrap(); // ensure there's a count to subtract from - accum.record_n(r, count_at_each_addend_value).unwrap(); + accum.record_n(v, count_at_each_addend_value).unwrap(); } subtrahends.push(h); @@ -166,15 +168,16 @@ fn do_add_benchmark Histogram>( for _ in 0..1000 { let mut h = addend_factory(); - for _ in 0..1000 { - let r = rng.gen(); - h.record_n(r, count_at_each_addend_value).unwrap(); + for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) { + h.record_n(v, count_at_each_addend_value).unwrap(); } addends.push(h); } - b.iter(|| for h in addends.iter() { - accum.add(h).unwrap(); + b.iter(|| { + for h in addends.iter() { + accum.add(h).unwrap(); + } }) } diff --git a/benches/serialization.rs b/benches/serialization.rs index 15a356c..6c89fe1 100644 --- a/benches/serialization.rs +++ b/benches/serialization.rs @@ -6,11 +6,13 @@ extern crate test; use hdrsample::*; use hdrsample::serialization::*; -use self::rand::distributions::range::Range; -use self::rand::distributions::IndependentSample; use self::test::Bencher; -use std::io::{Cursor, Write}; -use std::fmt::Debug; +use std::io::Cursor; + +use self::rand_varint::*; + +#[path = "../src/serialization/rand_varint.rs"] +mod rand_varint; #[bench] fn serialize_tiny_dense_v2(b: &mut Bencher) { @@ -168,17 +170,18 @@ fn do_serialize_bench( digits: u8, fraction_of_counts_len: f64, ) where - S: TestOnlyHypotheticalSerializerInterface, + S: Serializer, { let mut h = Histogram::::new_with_bounds(low, high, digits).unwrap(); let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize; let mut vec = Vec::with_capacity(random_counts); - let range = Range::new(low, high); - let mut rng = rand::weak_rng(); - for _ in 0..random_counts { - h.record(range.ind_sample(&mut rng)).unwrap(); + for v in RandomVarintEncodedLengthIter::new(&mut rng) + .filter(|v| v >= &low && v <= &high) + .take(random_counts) + { + h.record(v).unwrap(); } b.iter(|| { @@ -196,17 +199,18 @@ fn do_deserialize_bench( digits: u8, fraction_of_counts_len: f64, ) where - S: TestOnlyHypotheticalSerializerInterface, + S: Serializer, { let mut h = Histogram::::new_with_bounds(low, high, digits).unwrap(); let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize; let mut vec = Vec::with_capacity(random_counts); - let range = Range::new(low, high); - let mut rng = rand::weak_rng(); - for _ in 0..random_counts { - h.record(range.ind_sample(&mut rng)).unwrap(); + for v in RandomVarintEncodedLengthIter::new(&mut rng) + .filter(|v| v >= &low && v <= &high) + .take(random_counts) + { + h.record(v).unwrap(); } let _ = s.serialize(&h, &mut vec).unwrap(); @@ -217,5 +221,3 @@ fn do_deserialize_bench( let _: Histogram = d.deserialize(&mut cursor).unwrap(); }); } - -include!("../src/serialization/test_serialize_trait.rs"); diff --git a/examples/cli.rs b/examples/cli.rs index ca129ce..d92bc43 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -9,8 +9,9 @@ use std::fmt::Display; use clap::{App, Arg, SubCommand}; use hdrsample::{Histogram, RecordError}; -use hdrsample::serialization::{DeserializeError, Deserializer, V2DeflateSerializeError, - V2DeflateSerializer, V2SerializeError, V2Serializer}; +use hdrsample::serialization::{DeserializeError, Deserializer, Serializer, + V2DeflateSerializeError, V2DeflateSerializer, V2SerializeError, + V2Serializer}; fn main() { let default_max = format!("{}", u64::max_value()); diff --git a/src/errors/mod.rs b/src/errors/mod.rs index 38555f4..04b0c5c 100644 --- a/src/errors/mod.rs +++ b/src/errors/mod.rs @@ -1,4 +1,4 @@ -//! Errors types used throughout this library +//! Error types used throughout this library /// Errors that can occur when creating a histogram. #[derive(Debug, Eq, PartialEq, Clone, Copy)] diff --git a/src/iterators/linear.rs b/src/iterators/linear.rs index efa5f50..b433c3d 100644 --- a/src/iterators/linear.rs +++ b/src/iterators/linear.rs @@ -22,6 +22,8 @@ impl<'a, T: 'a + Counter> Iter<'a, T> { value_units_per_bucket > 0, "value_units_per_bucket must be > 0" ); + + let new_lowest = hist.lowest_equivalent(value_units_per_bucket - 1); HistogramIterator::new( hist, Iter { @@ -29,9 +31,7 @@ impl<'a, T: 'a + Counter> Iter<'a, T> { value_units_per_bucket, // won't underflow because value_units_per_bucket > 0 current_step_highest_value_reporting_level: value_units_per_bucket - 1, - current_step_lowest_value_reporting_level: hist.lowest_equivalent( - value_units_per_bucket - 1, - ), + current_step_lowest_value_reporting_level: new_lowest, }, ) } diff --git a/src/iterators/log.rs b/src/iterators/log.rs index 543d6b6..42a226c 100644 --- a/src/iterators/log.rs +++ b/src/iterators/log.rs @@ -27,6 +27,8 @@ impl<'a, T: 'a + Counter> Iter<'a, T> { "value_units_per_bucket must be > 0" ); assert!(log_base > 1.0, "log_base must be > 1.0"); + + let new_lowest = hist.lowest_equivalent(value_units_in_first_bucket - 1); HistogramIterator::new( hist, Iter { @@ -34,9 +36,7 @@ impl<'a, T: 'a + Counter> Iter<'a, T> { log_base, next_value_reporting_level: value_units_in_first_bucket as f64, current_step_highest_value_reporting_level: value_units_in_first_bucket - 1, - current_step_lowest_value_reporting_level: hist.lowest_equivalent( - value_units_in_first_bucket - 1, - ), + current_step_lowest_value_reporting_level: new_lowest, }, ) } diff --git a/src/lib.rs b/src/lib.rs index f7d6570..25a8d59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,7 +94,7 @@ //! use hdrsample::Histogram; //! let hist = Histogram::::new(2).unwrap(); //! // ... -//! println!("# of samples: {}", hist.count()); +//! println!("# of samples: {}", hist.len()); //! println!("99.9'th percentile: {}", hist.value_at_quantile(0.999)); //! ``` //! @@ -170,7 +170,6 @@ //! - `DoubleHistogram`. //! - The `Recorder` feature of HdrHistogram. //! - Value shifting ("normalization"). -//! - Timestamps and tags. //! - Textual output methods. These seem almost orthogonal to HdrSample, though it might be //! convenient if we implemented some relevant traits (CSV, JSON, and possibly simple //! `fmt::Display`). @@ -189,6 +188,10 @@ extern crate num_traits as num; +#[cfg(feature = "serialization")] +#[macro_use] +extern crate nom; + use std::borrow::Borrow; use std::cmp; use std::ops::{AddAssign, SubAssign}; @@ -777,7 +780,8 @@ impl Histogram { }; // Already checked that high >= 2*low - h.resize(high).map_err(|_| CreationError::UsizeTypeTooSmall)?; + h.resize(high) + .map_err(|_| CreationError::UsizeTypeTooSmall)?; Ok(h) } @@ -1748,9 +1752,6 @@ where // TODO: shift // TODO: hash -// TODO: encoding/decoding -// TODO: timestamps and tags -// TODO: textual output #[path = "tests/tests.rs"] #[cfg(test)] diff --git a/src/serialization/benchmarks.rs b/src/serialization/benchmarks.rs index 5bfceaf..972fbd7 100644 --- a/src/serialization/benchmarks.rs +++ b/src/serialization/benchmarks.rs @@ -63,8 +63,10 @@ fn do_varint_write_rand(b: &mut Bencher, range: Range) { } let mut buf = [0; 9]; - b.iter(|| for i in vec.iter() { - let _ = varint_write(*i, &mut buf); + b.iter(|| { + for i in vec.iter() { + let _ = varint_write(*i, &mut buf); + } }); } diff --git a/src/serialization/interval_log/mod.rs b/src/serialization/interval_log/mod.rs new file mode 100644 index 0000000..4ea70a4 --- /dev/null +++ b/src/serialization/interval_log/mod.rs @@ -0,0 +1,717 @@ +//! Interval log parsing and writing. +//! +//! Interval logs, as handled by the Java implementation's `HistogramLogWriter`, +//! `HistogramLogReader`, and `HistogramLogProcessor`, are a way to record a sequence of histograms +//! over time. Suppose you were running a load test for an hour: you might want to record a +//! histogram per second or minute so that you could correlate measurements with behavior you might +//! see in logs, etc. +//! +//! An interval log contains some initial metadata, then a sequence of histograms, each with some +//! additional metadata (timestamps, etc). See `IntervalLogHistogram`. +//! +//! The intervals in the log should be ordered by start timestamp. It's possible to write (and +//! parse) logs with intervals in any order, but the expectation is that they will be sorted. +//! +//! To parse a log, see `IntervalLogIterator`. To write a log, see `IntervalLogHeaderWriter`. +//! +//! # Timestamps +//! +//! Each interval has a timestamp in seconds associated with it. However, it's not necessarily as +//! simple as just interpreting the number as seconds since the epoch. +//! +//! There are two optional pieces of header metadata: "StartTime" and "BaseTime". Neither, one, or +//! both of these may be present. It is possible to have multiple StartTime or BaseTime entries in +//! the log, but that is discouraged as it is confusing to interpret. It is also possible to have +//! StartTime and BaseTime interleaved with interval histograms, but that is even more confusing, so +//! this API prevents you from doing so. +//! +//! ### Timestamp options +//! +//! This is a summary of the logic used by the Java impl's `HistogramLogReader` for StartTime and +//! BaseTime. +//! +//! - Neither are present: interval timestamps should be interpreted as seconds since the epoch. +//! - StartTime is present: StartTime is a number of seconds since epoch, and interval timestamps +//! should be interpreted as deltas that could be added to StartTime if seconds since epoch for each +//! interval is needed. +//! - BaseTime is present: same as the case where StartTime is present. It's seconds since epoch, +//! with interval timestamps as deltas. +//! - BaseTime and StartTime are present: The BaseTime is used like it is when it's the only one +//! present: it's a number of seconds since epoch that serves as the starting point for the +//! per-interval deltas to get a wall-clock time for each interval. The StartTime is a *different* +//! number of seconds since epoch whose meaning is really up to the user. One hypothetical use might +//! be if you're performing a long-running benchmark and outputting a new interval log every hour. +//! The BaseTime of each log would be the seconds since epoch at the creation time of that log file, +//! but the StartTime would be the same for each file: the time that the benchmark started. Thus, +//! if you wanted to find the interval histogram for 4000 seconds into the benchmark, you would load +//! the second hour's file, add each interval's timestamp to that log's BaseTime, and select the one +//! whose (timestmap + BaseTime) was 4000 bigger than the StartTime. This seems to be how the Java +//! impl uses it: `HistogramLogReader` lets you filter by "non-absolute" start/end time or by +//! "absolute" start/end time. The former uses a range of deltas from StartTime and selects +//! intervals where `interval_timestamp + base_time - start_time` is in the requested range, while +//! the latter uses a range of absolute timestamps and selects via `interval_timestamp + base_time`. +//! +//! ### Timestamp recommendations +//! +//! As you can see from that slab of text, using both BaseTime and StartTime is complex. +//! +//! We suggest one of the following: +//! +//! - Don't use a timestamp header, and simply have each interval's timestamp be the seconds since +//! epoch. +//! - Use StartTime, and have each interval's timestamp be a delta from StartTime. +//! +//! Of course, if you are writing logs that need to work with an existing log processing pipeline, +//! you should use timestamps as expected by that logic, so we provide the ability to have all +//! combinations of timestamp headers if need be. +//! +//! # Examples +//! +//! Parse a single interval from a log. +//! +//! ``` +//! use std::time; +//! use hdrsample::serialization::interval_log; +//! +//! // two newline-separated log lines: a comment, then an interval +//! let log = b"#I'm a comment\nTag=t,0.127,1.007,2.769,base64EncodedHisto\n"; +//! +//! let mut iter = interval_log::IntervalLogIterator::new(&log[..]); +//! +//! // the comment is consumed and ignored by the parser, so the first event is an Interval +//! match iter.next().unwrap() { +//! Ok(interval_log::LogEntry::Interval(h)) => { +//! assert_eq!(time::Duration::new(0, 127_000_000), h.start_timestamp()); +//! } +//! _ => panic!() +//! } +//! +//! // there are no more lines in the log; iteration complete +//! assert_eq!(None, iter.next()); +//! ``` +//! +//! Skip logs that started before 3 seconds. +//! +//! ``` +//! use hdrsample::serialization::interval_log; +//! +//! let log = "\ +//! #I'm a comment\n\ +//! Tag=a,0.123,1.007,2.769,base64EncodedHisto\n\ +//! 1.456,1.007,2.769,base64EncodedHisto\n\ +//! 3.789,1.007,2.769,base64EncodedHisto\n\ +//! Tag=b,4.123,1.007,2.769,base64EncodedHisto\n\ +//! 5.456,1.007,2.769,base64EncodedHisto\n\ +//! #Another comment\n" +//! .as_bytes(); +//! +//! let iter = interval_log::IntervalLogIterator::new(&log); +//! +//! let count = iter +//! // only look at intervals (which are the only non-comment lines in this log) +//! .filter_map(|e| match e { +//! Ok(interval_log::LogEntry::Interval(ilh)) => Some(ilh), +//! _ => None +//! }) +//! // do any filtering you want +//! .filter(|ilh| ilh.start_timestamp().as_secs() >= 3) +//! .count(); +//! +//! assert_eq!(3, count); +//! ``` +//! +//! Write a log. +//! +//! ``` +//! use std::{str, time}; +//! use hdrsample; +//! use hdrsample::serialization; +//! use hdrsample::serialization::interval_log; +//! +//! let mut buf = Vec::new(); +//! let mut serializer = serialization::V2Serializer::new(); +//! +//! let mut h = hdrsample::Histogram::::new_with_bounds( +//! 1, u64::max_value(), 3).unwrap(); +//! h.record(12345).unwrap(); +//! +//! // limit scope of mutable borrow of `buf` +//! { +//! let now = time::SystemTime::now(); +//! let mut log_writer = interval_log::IntervalLogWriterBuilder::new() +//! .add_comment("Comments are great") +//! .with_start_time(now) +//! .begin_log_with(&mut buf, &mut serializer) +//! .unwrap(); +//! +//! log_writer.write_comment( +//! "You can have comments anywhere in the log").unwrap(); +//! +//! log_writer +//! .write_histogram( +//! &h, +//! now.elapsed().unwrap(), +//! time::Duration::new(12, 345_678_901), +//! interval_log::Tag::new("im-a-tag") +//! ) +//! .unwrap(); +//! } +//! +//! // `buf` is now full of stuff; we check for the first line +//! assert_eq!("#Comments are great\n", &str::from_utf8(&buf).unwrap()[0..20]); +//! ``` + +extern crate base64; + +use std::{fmt, io, ops, str, time}; +use std::fmt::Write; + +use nom::{double, is_digit, ErrorKind, IResult}; + +use super::super::{Counter, Histogram}; +use super::Serializer; + +/// Prepare an `IntervalLogWriter`. +/// +/// This type only allows writing comments and headers. Once you're done writing those things, use +/// `into_log_writer()` to convert this into an `IntervalLogWriter`. +pub struct IntervalLogWriterBuilder { + comments: Vec, + start_time: Option, + base_time: Option, + max_value_divisor: f64, +} + +impl IntervalLogWriterBuilder { + /// Create a new log writer that writes to `writer` and serializes histograms with `serializer`. + pub fn new() -> IntervalLogWriterBuilder { + IntervalLogWriterBuilder { + comments: Vec::new(), + start_time: None, + base_time: None, + max_value_divisor: 1.0, + } + } + + /// Add a comment line to be written when the writer is built. + /// + /// Comments containing '\n' will be transformed into multiple lines of comments. + pub fn add_comment(&mut self, s: &str) -> &mut Self { + self.comments.push(s.to_owned()); + self + } + + /// Set a StartTime. See the module-level documentation for more info. + /// + /// This can be called multiple times, but only the value for the most recent invocation will + /// be written. + pub fn with_start_time(&mut self, time: time::SystemTime) -> &mut Self { + self.start_time = Some(system_time_as_fp_seconds(time)); + self + } + + /// Set a BaseTime. See the module-level documentation for more info. + /// + /// This can be called multiple times, but only the value for the most recent invocation will + /// be written. + pub fn with_base_time(&mut self, time: time::SystemTime) -> &mut Self { + self.base_time = Some(system_time_as_fp_seconds(time)); + self + } + + /// Set a max value divisor. + /// + /// This is used to scale down the max value part of an interval log to something that may be + /// more human readable. The max value in the log is only for human consumption, so you might + /// prefer to divide by 109 to turn nanoseconds into fractional seconds, for + /// instance. + /// + /// If this is not set, 1.0 will be used. + /// + /// This can be called multiple times, but only the value for the most recent invocation will + /// be written. + pub fn with_max_value_divisor(&mut self, max_value_divisor: f64) -> &mut Self { + self.max_value_divisor = max_value_divisor; + self + } + + /// Build a LogWriter and apply any configured headers. + pub fn begin_log_with<'a, 'b, W: 'a + io::Write, S: 'b + Serializer>( + &self, + writer: &'a mut W, + serializer: &'b mut S, + ) -> Result, io::Error> { + let mut internal_writer = InternalLogWriter { + writer, + serializer, + text_buf: String::new(), + serialize_buf: Vec::new(), + max_value_divisor: self.max_value_divisor, + }; + + for c in &self.comments { + internal_writer.write_comment(&c)?; + } + + if let Some(st) = self.start_time { + internal_writer.write_fmt(format_args!( + "#[StartTime: {:.3} (seconds since epoch)]\n", + st + ))?; + } + + if let Some(bt) = self.base_time { + internal_writer.write_fmt(format_args!( + "#[BaseTime: {:.3} (seconds since epoch)]\n", + bt + ))?; + } + + // The Java impl doesn't write a comment for this but it's confusing to silently modify the + // max value without leaving a trace + if self.max_value_divisor != 1.0_f64 { + internal_writer.write_fmt(format_args!( + "#[MaxValueDivisor: {:.3}]\n", + self.max_value_divisor + ))?; + } + + Ok(IntervalLogWriter { internal_writer }) + } +} + +/// Writes interval histograms in an interval log. +/// +/// This isn't created directly; start with an `IntervalLogWriterBuilder`. Once you've written the +/// headers and ended up with an `IntervalLogWriter`, typical usage would be to write a histogram +/// at regular intervals (e.g. once a second). +/// +/// ``` +/// use hdrsample::serialization; +/// use hdrsample::serialization::interval_log; +/// +/// let mut buf = Vec::new(); +/// let mut serializer = serialization::V2Serializer::new(); +/// +/// // create a writer via a builder +/// let mut writer = interval_log::IntervalLogWriterBuilder::new() +/// .begin_log_with(&mut buf, &mut serializer) +/// .unwrap(); +/// +/// writer.write_comment("Comment 2").unwrap(); +/// +/// // .. write some intervals +/// ``` +pub struct IntervalLogWriter<'a, 'b, W: 'a + io::Write, S: 'b + Serializer> { + internal_writer: InternalLogWriter<'a, 'b, W, S>, +} + +impl<'a, 'b, W: 'a + io::Write, S: 'b + Serializer> IntervalLogWriter<'a, 'b, W, S> { + /// Write a comment line. + /// + /// Comments containing '\n' will be transformed into multiple lines of comments. + pub fn write_comment(&mut self, s: &str) -> io::Result<()> { + self.internal_writer.write_comment(s) + } + + /// Write an interval histogram. + /// + /// `start_timestamp` is the time since the epoch in seconds that measurements started being + /// recorded in this interval. If you're using a StartTime or BaseTime offset, you should + /// instead use a delta since that time. See the discussion about timestamps in the module-level + /// documentation. + /// + /// `duration` is the duration of the interval in seconds. + /// + /// `tag` is an optional tag for this histogram. + pub fn write_histogram( + &mut self, + h: &Histogram, + start_timestamp: time::Duration, + duration: time::Duration, + tag: Option, + ) -> Result<(), IntervalLogWriterError> { + self.internal_writer + .write_histogram(h, start_timestamp, duration, tag) + } +} + +/// Errors that can occur while writing a log. +#[derive(Debug)] +pub enum IntervalLogWriterError { + /// Histogram serialization failed. + SerializeError(E), + /// An i/o error occurred. + IoError(io::ErrorKind), +} + +impl From for IntervalLogWriterError { + fn from(e: io::Error) -> Self { + IntervalLogWriterError::IoError(e.kind()) + } +} + +/// Write interval logs. +struct InternalLogWriter<'a, 'b, W: 'a + io::Write, S: 'b + Serializer> { + writer: &'a mut W, + serializer: &'b mut S, + text_buf: String, + serialize_buf: Vec, + max_value_divisor: f64, +} + +impl<'a, 'b, W: 'a + io::Write, S: 'b + Serializer> InternalLogWriter<'a, 'b, W, S> { + fn write_fmt(&mut self, args: fmt::Arguments) -> io::Result<()> { + self.writer.write_fmt(args) + } + + fn write_comment(&mut self, s: &str) -> io::Result<()> { + for l in s.split('\n') { + write!(self.writer, "#{}\n", l)?; + } + + Ok(()) + } + + fn write_histogram( + &mut self, + h: &Histogram, + start_timestamp: time::Duration, + duration: time::Duration, + tag: Option, + ) -> Result<(), IntervalLogWriterError> { + self.serialize_buf.clear(); + self.text_buf.clear(); + + if let Some(Tag(s)) = tag { + write!(self.text_buf, "Tag={},", &s).expect("Writes to a String can't fail"); + } + + write!( + self.writer, + "{}{:.3},{:.3},{:.3},", + self.text_buf, + duration_as_fp_seconds(start_timestamp), + duration_as_fp_seconds(duration), + h.max() as f64 / self.max_value_divisor // because the Java impl does it this way + )?; + + self.text_buf.clear(); + let _len = self.serializer + .serialize(h, &mut self.serialize_buf) + .map_err(|e| IntervalLogWriterError::SerializeError(e))?; + base64::encode_config_buf(&self.serialize_buf, base64::STANDARD, &mut self.text_buf); + + self.writer.write_all(self.text_buf.as_bytes())?; + self.writer.write_all(b"\n")?; + + Ok(()) + } +} + +/// A tag for an interval histogram. +/// +/// Tags are just `str`s that do not contain a few disallowed characters: ',', '\r', '\n', and ' '. +/// +/// To get the wrapped `str` back out, use `as_str()` or the `Deref` implementation +/// (`&some_tag`). +#[derive(Debug, PartialEq, Clone, Copy)] +pub struct Tag<'a>(&'a str); + +impl<'a> Tag<'a> { + /// Create a new Tag. + /// + /// If a disallowed character is present, this will return `None`. + pub fn new(s: &'a str) -> Option> { + if s.chars() + .any(|c| c == ',' || c == '\r' || c == '\n' || c == ' ') + { + None + } else { + Some(Tag(s)) + } + } + + /// Returns the tag contents as a str. + pub fn as_str(&self) -> &'a str { + self.0 + } +} + +impl<'a> ops::Deref for Tag<'a> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +/// An individual interval histogram parsed from an interval log. +#[derive(PartialEq, Debug)] +pub struct IntervalLogHistogram<'a> { + tag: Option>, + start_timestamp: time::Duration, + duration: time::Duration, + max: f64, + encoded_histogram: &'a str, +} + +impl<'a> IntervalLogHistogram<'a> { + /// Tag, if any is present. + pub fn tag(&self) -> Option> { + self.tag + } + + /// Timestamp of the start of the interval in seconds, expressed as a `Duration` relative to + /// some start point. + /// + /// The timestamp may be absolute vs the epoch, or there may be a `StartTime` or `BaseTime` for + /// the log, in which case you may wish to consider this number as a delta vs those timestamps. + /// See the module-level documentation about timestamps. + pub fn start_timestamp(&self) -> time::Duration { + self.start_timestamp + } + + /// Duration of the interval in seconds. + pub fn duration(&self) -> time::Duration { + self.duration + } + + /// Max value in the encoded histogram + /// + /// This max value is the max of the histogram divided by some scaling factor (which may be + /// 1.0). + pub fn max(&self) -> f64 { + self.max + } + + /// Base64-encoded serialized histogram. + /// + /// If you need the deserialized histogram, base64-decode and use a `Deserializer` on the + /// resulting bytes. + pub fn encoded_histogram(&self) -> &'a str { + self.encoded_histogram + } +} + +#[derive(PartialEq, Debug)] +/// Represents one non-comment line in an interval log. +/// +/// One thing to note is that the way your interval timestamps work can vary. If your log was +/// written with a StartTime or BaseTime, that metadata will appear in header comments, and that +/// will be represented by the iterator providing the corresponding variants here. The presence +/// of those timestamps will affect how you should interpret the timestamps for individual +/// intervals. See the module-level documentation. +#[allow(variant_size_differences)] +pub enum LogEntry<'a> { + /// Logs may include a StartTime. If present, it represents seconds since the epoch. + StartTime(time::Duration), + /// Logs may include a BaseTime. If present, it represents seconds since the epoch. + BaseTime(time::Duration), + /// An individual interval histogram. + Interval(IntervalLogHistogram<'a>), +} + +/// Errors that occur when parsing an interval log. +#[derive(Debug, PartialEq)] +pub enum LogIteratorError { + /// Parsing failed + ParseError { + /// Offset in the input where the failed parse started + offset: usize, + }, +} + +/// Parse interval logs. +/// +/// This iterator exposes each item (excluding comments and other information-free lines). See +/// `LogEntry`. +/// +/// Because histogram deserialization is deferred, parsing logs is fast. See the `interval_log` +/// benchmark if you wish to see how it does on your hardware. As a baseline, parsing a log of 1000 +/// random histograms of 10,000 values each takes 2ms total on an E5-1650v3. +/// +/// Deferring deserialization is handy because it allows you to cheaply navigate the log to find +/// the records you care about (e.g. ones in a certain time range, or with a certain tag) without +/// doing all the allocation, etc, of deserialization. +/// +/// If you're looking for a direct port of the Java impl's `HistogramLogReader`, this isn't one: it +/// won't deserialize for you, and it pushes the burden of figuring out what to do with StartTime, +/// BaseTime, etc to you, and there aren't built in functions to filter by timestamp. On the other +/// hand, because it doesn't do those things, it is much more flexible: you can easily build any +/// sort of filtering you want, not just timestamp ranges, because you have cheap access to all the +/// metadata before incurring the cost of deserialization. If you're not using any timestamp +/// headers, or at least using them in straightforward ways, it is easy to accumulate the +/// timestamp state you need. Since all the parsing is taken care of already, writing your own +/// `HistogramLogReader` equivalent that fits the way your logs are assembled is just a couple of +/// lines. (And if you're doing complex stuff, we probably wouldn't have built something that fits +/// your quirky logs anyway!) +/// +/// This parses from a slice representing the complete file because it made implementation easier +/// (and also supports mmap'd files for maximum parsing speed). If parsing from a `Read` is +/// important for your use case, open an issue about it. +pub struct IntervalLogIterator<'a> { + orig_len: usize, + input: &'a [u8], + ended: bool, +} + +impl<'a> IntervalLogIterator<'a> { + /// Create a new iterator from the UTF-8 bytes of an interval log. + pub fn new(input: &'a [u8]) -> IntervalLogIterator<'a> { + IntervalLogIterator { + orig_len: input.len(), + input, + ended: false, + } + } +} + +impl<'a> Iterator for IntervalLogIterator<'a> { + type Item = Result, LogIteratorError>; + + fn next(&mut self) -> Option { + loop { + if self.ended { + return None; + } + + if self.input.is_empty() { + self.ended = true; + return None; + } + + // Look for magic comments first otherwise they will get matched by the simple comment + // parser + if let IResult::Done(rest, e) = log_entry(self.input) { + self.input = rest; + return Some(Ok(e)); + } + + // it wasn't a log entry; try parsing a comment + match ignored_line(self.input) { + IResult::Done(rest, _) => { + self.input = rest; + continue; + } + _ => { + self.ended = true; + return Some(Err(LogIteratorError::ParseError { + offset: self.orig_len - self.input.len(), + })); + } + } + } + } +} + +fn duration_as_fp_seconds(d: time::Duration) -> f64 { + d.as_secs() as f64 + d.subsec_nanos() as f64 / 1_000_000_000_f64 +} + +fn system_time_as_fp_seconds(time: time::SystemTime) -> f64 { + match time.duration_since(time::UNIX_EPOCH) { + Ok(dur_after_epoch) => duration_as_fp_seconds(dur_after_epoch), + // Doesn't seem possible to be before the epoch, but using a negative number seems like + // a reasonable representation if it does occur + Err(t) => duration_as_fp_seconds(t.duration()) * -1_f64, + } +} + +named!(start_time<&[u8], LogEntry>, + do_parse!( + tag!("#[StartTime: ") >> + dur: fract_sec_duration >> + char!(' ') >> + take_until_and_consume!("\n") >> + (LogEntry::StartTime(dur)) +)); + +named!(base_time<&[u8], LogEntry>, + do_parse!( + tag!("#[BaseTime: ") >> + dur: fract_sec_duration >> + char!(' ') >> + take_until_and_consume!("\n") >> + (LogEntry::BaseTime(dur)) +)); + +named!(interval_hist<&[u8], LogEntry>, + do_parse!( + tag: opt!( + map!( + map_res!( + map!(pair!(tag!("Tag="), take_until_and_consume!(",")), |p| p.1), + str::from_utf8), + |s| Tag(s))) >> + start_timestamp: fract_sec_duration >> + char!(',') >> + duration: fract_sec_duration >> + char!(',') >> + max: double >> + char!(',') >> + encoded_histogram: map_res!(take_until_and_consume!("\n"), str::from_utf8) >> + (LogEntry::Interval(IntervalLogHistogram { + tag, + start_timestamp, + duration, + max, + encoded_histogram + })) + ) +); + +named!(log_entry<&[u8], LogEntry>, + alt_complete!(start_time | base_time | interval_hist)); + +named!(comment_line<&[u8], ()>, + do_parse!(tag!("#") >> take_until_and_consume!("\n") >> (())) +); + +named!(legend<&[u8], ()>, + do_parse!(tag!("\"StartTimestamp\"") >> take_until_and_consume!("\n") >> (())) +); + +named!(ignored_line<&[u8], ()>, alt!(comment_line | legend)); + +fn fract_sec_duration(input: &[u8]) -> IResult<&[u8], time::Duration> { + match fract_sec_tuple(input) { + IResult::Done(rest, data) => { + let (secs, nanos_str) = data; + + // only read up to 9 digits since we can only support nanos, not smaller precision + let nanos_parse_res = if nanos_str.len() > 9 { + nanos_str[0..9].parse::() + } else if nanos_str.len() == 9 { + nanos_str.parse::() + } else { + nanos_str + .parse::() + // subtraction will not overflow because len is < 9 + .map(|n| n * 10_u32.pow(9 - nanos_str.len() as u32)) + }; + + if let Ok(nanos) = nanos_parse_res { + return IResult::Done(rest, time::Duration::new(secs, nanos)); + } + + // nanos were invalid utf8. We don't expose these errors, so don't bother defining a + // custom error type. + return IResult::Error(ErrorKind::Custom(0)); + } + IResult::Error(e) => return IResult::Error(e), + IResult::Incomplete(n) => return IResult::Incomplete(n), + } +} + +named!(fract_sec_tuple<&[u8], (u64, &str)>, + do_parse!( + secs: flat_map!(recognize!(take_until!(".")), parse_to!(u64)) >> + tag!(".") >> + nanos_str: map_res!(take_while1!(is_digit), str::from_utf8) >> + (secs, nanos_str) + ) +); + +#[cfg(test)] +mod tests; diff --git a/src/serialization/interval_log/tests.rs b/src/serialization/interval_log/tests.rs new file mode 100644 index 0000000..518a0b9 --- /dev/null +++ b/src/serialization/interval_log/tests.rs @@ -0,0 +1,459 @@ +extern crate rand; + +use self::rand::Rng; + +use std::{iter, time}; +use std::ops::Add; + +use super::super::super::*; +use super::super::*; +use super::*; + +#[test] +fn write_header_comment() { + let mut buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + let _ = IntervalLogWriterBuilder::new() + .add_comment("foo") + .begin_log_with(&mut buf, &mut serializer) + .unwrap(); + + assert_eq!(&b"#foo\n"[..], &buf[..]); +} + +#[test] +fn write_header_then_interval_comment() { + let mut buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + { + let mut log_writer = IntervalLogWriterBuilder::new() + .add_comment("foo") + .add_comment("bar") + .begin_log_with(&mut buf, &mut serializer) + .unwrap(); + log_writer.write_comment("baz").unwrap(); + } + + assert_eq!("#foo\n#bar\n#baz\n", str::from_utf8(&buf[..]).unwrap()); +} + +#[test] +fn write_comment_control_characters_still_parseable() { + let mut buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + let mut control_chars = String::new(); + + // control chars are U+0000-001F, 7F, 80-9F + for c_byte in (0..0x20_u8).chain(iter::once(0x7F)).chain(0x80..0xA0) { + let c = c_byte as char; + assert!(c.is_control()); + control_chars.push(c); + } + + assert_eq!(2 * 16 + 1 + 2 * 16, control_chars.chars().count()); + + { + let mut log_writer = IntervalLogWriterBuilder::new() + .add_comment("unicode") + .add_comment(&control_chars) + .add_comment("whew") + .with_start_time(system_time_after_epoch(123, 456_000_000)) + .begin_log_with(&mut buf, &mut serializer) + .unwrap(); + log_writer.write_comment("baz").unwrap(); + } + + let before_newline = &control_chars[0..10]; + let after_newline = &control_chars[11..]; + let expected = format!( + "#unicode\n#{}\n#{}\n#whew\n#[StartTime: 123.456 (seconds since epoch)]\n#baz\n", + before_newline, + after_newline + ); + assert_eq!(&expected, str::from_utf8(&buf[..]).unwrap()); + + let mut i = IntervalLogIterator::new(&buf); + assert_eq!( + Some(Ok( + LogEntry::StartTime(time::Duration::new(123, 456_000_000)) + )), + i.next() + ); + assert_eq!(None, i.next()); +} + +#[test] +fn write_comment_newline_wraps() { + let mut buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + { + let _ = IntervalLogWriterBuilder::new() + .add_comment("before") + .add_comment("new\nline") + .add_comment("after") + .begin_log_with(&mut buf, &mut serializer) + .unwrap(); + } + + assert_eq!( + "#before\n#new\n#line\n#after\n", + str::from_utf8(&buf[..]).unwrap() + ); +} + +#[test] +fn write_headers_multiple_times_only_last_is_used() { + let mut buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + { + let _ = IntervalLogWriterBuilder::new() + .with_start_time(system_time_after_epoch(10, 0)) + .with_base_time(system_time_after_epoch(20, 0)) + .with_start_time(system_time_after_epoch(100, 0)) + .with_base_time(system_time_after_epoch(200, 0)) + .with_max_value_divisor(1_000.0) + .with_max_value_divisor(1_000_000.0) + .begin_log_with(&mut buf, &mut serializer) + .unwrap(); + } + + let expected = "\ + #[StartTime: 100.000 (seconds since epoch)]\n\ + #[BaseTime: 200.000 (seconds since epoch)]\n\ + #[MaxValueDivisor: 1000000.000]\n"; + + assert_eq!(expected, str::from_utf8(&buf[..]).unwrap()); +} + +#[test] +fn write_interval_histo_no_tag() { + let mut buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + let mut h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); + h.record(1000).unwrap(); + + { + let mut log_writer = IntervalLogWriterBuilder::new() + .with_max_value_divisor(10.0) + .begin_log_with(&mut buf, &mut serializer) + .unwrap(); + + log_writer + .write_histogram( + &h, + time::Duration::new(1, 234_567_890), + time::Duration::new(5, 670_000_000), + None, + ) + .unwrap(); + } + + let expected = "\ + #[MaxValueDivisor: 10.000]\n\ + 1.235,5.670,100.000,HISTEwAAAAMAAAAAAAAAAwAAAAAAAAAB//////////8/8AAAAAAAAM8PAg==\n"; + + assert_eq!(expected, str::from_utf8(&buf[..]).unwrap()); +} + +#[test] +fn write_interval_histo_with_tag() { + let mut buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + let h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); + + { + let mut log_writer = IntervalLogWriterBuilder::new() + .begin_log_with(&mut buf, &mut serializer) + .unwrap(); + + log_writer + .write_histogram( + &h, + time::Duration::new(1, 234_000_000), + time::Duration::new(5, 678_000_000), + Tag::new("t"), + ) + .unwrap(); + } + + assert_eq!( + "Tag=t,1.234,5.678,0.000,HISTEwAAAAEAAAAAAAAAAwAAAAAAAAAB//////////8/8AAAAAAAAAA=\n", + str::from_utf8(&buf[..]).unwrap() + ); +} + +#[test] +fn write_start_time() { + let mut buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + let _ = IntervalLogWriterBuilder::new() + .with_start_time(system_time_after_epoch(123, 456_789_012)) + .begin_log_with(&mut buf, &mut serializer) + .unwrap(); + + assert_eq!( + "#[StartTime: 123.457 (seconds since epoch)]\n", + str::from_utf8(&buf[..]).unwrap() + ); +} + +#[test] +fn write_base_time() { + let mut buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + { + let _ = IntervalLogWriterBuilder::new() + .with_base_time(system_time_after_epoch(123, 456_789_012)) + .begin_log_with(&mut buf, &mut serializer) + .unwrap(); + } + + assert_eq!( + "#[BaseTime: 123.457 (seconds since epoch)]\n", + str::from_utf8(&buf[..]).unwrap() + ); +} + +#[test] +fn parse_duration_full_ns() { + let (rest, dur) = fract_sec_duration(b"123456.789012345foo").unwrap(); + + assert_eq!(time::Duration::new(123456, 789_012_345), dur); + assert_eq!(b"foo", rest); +} + +#[test] +fn parse_duration_scale_ns() { + let (rest, dur) = fract_sec_duration(b"123456.789012foo").unwrap(); + + assert_eq!(time::Duration::new(123456, 789_012_000), dur); + assert_eq!(b"foo", rest); +} + +#[test] +fn parse_duration_too_many_ns() { + let (rest, dur) = fract_sec_duration(b"123456.7890123456foo").unwrap(); + + // consumes all the numbers, but only parses the first 9 + assert_eq!(time::Duration::new(123456, 789_012_345), dur); + assert_eq!(b"foo", rest); +} + +#[test] +fn duration_fp_roundtrip_accuracy() { + let mut rng = rand::thread_rng(); + + let mut buf = String::new(); + let mut errors = Vec::new(); + for _ in 0..100_000 { + buf.clear(); + + // pick seconds + let secs = rng.gen_range(0, 2_000_000_000); + // pick nsecs that only has ms accuracy + let nsecs = rng.gen_range(0, 1000) * 1000_000; + + let dur = time::Duration::new(secs, nsecs); + let fp_secs = duration_as_fp_seconds(dur); + + write!(&mut buf, "{:.3}", fp_secs).unwrap(); + + let (_, dur2) = fract_sec_duration(buf.as_bytes()).unwrap(); + + if dur != dur2 { + errors.push((dur, dur2)); + } + } + + if !errors.is_empty() { + for &(dur, dur2) in &errors { + println!("{:?} -> {:?}", dur, dur2); + } + } + + assert_eq!(0, errors.len()); +} + +#[test] +fn parse_start_time_with_human_date() { + let (rest, e) = start_time( + b"#[StartTime: 1441812279.474 (seconds since epoch), Wed Sep 09 08:24:39 PDT 2015]\nfoo", + ).unwrap(); + + let expected = LogEntry::StartTime(time::Duration::new(1441812279, 474_000_000)); + + assert_eq!(expected, e); + assert_eq!(b"foo", rest); +} + +#[test] +fn parse_start_time_without_human_date() { + // Can't be bothered to format a timestamp for humans, so we don't write that data. It's just + // another part that could be wrong -- what if it disagrees with the seconds since epoch? + // Also, BaseTime doesn't have a human-formatted time. + let (rest, e) = start_time(b"#[StartTime: 1441812279.474 (seconds since epoch)]\nfoo").unwrap(); + + let expected = LogEntry::StartTime(time::Duration::new(1441812279, 474_000_000)); + + assert_eq!(expected, e); + assert_eq!(b"foo", rest); +} + +#[test] +fn parse_base_time() { + let (rest, e) = base_time(b"#[BaseTime: 1441812279.474 (seconds since epoch)]\nfoo").unwrap(); + + let expected = LogEntry::BaseTime(time::Duration::new(1441812279, 474_000_000)); + + assert_eq!(expected, e); + assert_eq!(b"foo", rest); +} + +#[test] +fn parse_legend() { + let input = b"\"StartTimestamp\",\"Interval_Length\",\"Interval_Max\",\ + \"Interval_Compressed_Histogram\"\nfoo"; + let (rest, _) = legend(input).unwrap(); + + assert_eq!(b"foo", rest); +} + +#[test] +fn parse_comment() { + let (rest, _) = comment_line(b"#SomeOtherComment\nfoo").unwrap(); + + assert_eq!(b"foo", rest); +} + +#[test] +fn parse_interval_hist_no_tag() { + let (rest, e) = interval_hist(b"0.127,1.007,2.769,couldBeBase64\nfoo").unwrap(); + + let expected = LogEntry::Interval(IntervalLogHistogram { + tag: None, + start_timestamp: time::Duration::new(0, 127_000_000), + duration: time::Duration::new(1, 7_000_000), + max: 2.769, + encoded_histogram: "couldBeBase64", + }); + + assert_eq!(expected, e); + assert_eq!(b"foo", rest); +} + +#[test] +fn parse_interval_hist_with_tag() { + let (rest, e) = interval_hist(b"Tag=t,0.127,1.007,2.769,couldBeBase64\nfoo").unwrap(); + + let expected = LogEntry::Interval(IntervalLogHistogram { + tag: Some(Tag("t")), + start_timestamp: time::Duration::new(0, 127_000_000), + duration: time::Duration::new(1, 7_000_000), + max: 2.769, + encoded_histogram: "couldBeBase64", + }); + + assert_eq!(expected, e); + assert_eq!(b"foo", rest); +} + +#[test] +fn iter_with_ignored_prefix() { + let mut data = Vec::new(); + data.extend_from_slice(b"#I'm a comment\n"); + data.extend_from_slice(b"\"StartTimestamp\",etc\n"); + data.extend_from_slice(b"Tag=t,0.127,1.007,2.769,couldBeBase64\n"); + data.extend_from_slice(b"#[StartTime: 1441812279.474 ...\n"); + + let entries: Vec = IntervalLogIterator::new(&data) + .map(|r| r.unwrap()) + .collect(); + + let expected0 = LogEntry::Interval(IntervalLogHistogram { + tag: Some(Tag("t")), + start_timestamp: time::Duration::new(0, 127_000_000), + duration: time::Duration::new(1, 7_000_000), + max: 2.769, + encoded_histogram: "couldBeBase64", + }); + + let expected1 = LogEntry::StartTime(time::Duration::new(1441812279, 474_000_000)); + + assert_eq!(vec![expected0, expected1], entries) +} + +#[test] +fn iter_without_ignored_prefix() { + let mut data = Vec::new(); + data.extend_from_slice(b"Tag=t,0.127,1.007,2.769,couldBeBase64\n"); + data.extend_from_slice(b"#[StartTime: 1441812279.474 ...\n"); + + let entries: Vec = IntervalLogIterator::new(&data) + .map(|r| r.unwrap()) + .collect(); + + let expected0 = LogEntry::Interval(IntervalLogHistogram { + tag: Some(Tag("t")), + start_timestamp: time::Duration::new(0, 127_000_000), + duration: time::Duration::new(1, 7_000_000), + max: 2.769, + encoded_histogram: "couldBeBase64", + }); + + let expected1 = LogEntry::StartTime(time::Duration::new(1441812279, 474_000_000)); + + assert_eq!(vec![expected0, expected1], entries) +} + +#[test] +fn iter_multiple_entrties_with_interleaved_ignored() { + let mut data = Vec::new(); + data.extend_from_slice(b"#I'm a comment\n"); + data.extend_from_slice(b"\"StartTimestamp\",etc\n"); + data.extend_from_slice(b"Tag=t,0.127,1.007,2.769,couldBeBase64\n"); + data.extend_from_slice(b"#Another comment\n"); + data.extend_from_slice(b"#[StartTime: 1441812279.474 ...\n"); + data.extend_from_slice(b"#Yet another comment\n"); + data.extend_from_slice(b"#[BaseTime: 1441812279.474 ...\n"); + data.extend_from_slice(b"#Enough with the comments\n"); + + let entries: Vec = IntervalLogIterator::new(&data) + .map(|r| r.unwrap()) + .collect(); + + let expected0 = LogEntry::Interval(IntervalLogHistogram { + tag: Some(Tag("t")), + start_timestamp: time::Duration::new(0, 127_000_000), + duration: time::Duration::new(1, 7_000_000), + max: 2.769, + encoded_histogram: "couldBeBase64", + }); + + let expected1 = LogEntry::StartTime(time::Duration::new(1441812279, 474_000_000)); + let expected2 = LogEntry::BaseTime(time::Duration::new(1441812279, 474_000_000)); + + assert_eq!(vec![expected0, expected1, expected2], entries) +} + +#[test] +fn iter_all_ignored_empty_iter() { + let mut data = Vec::new(); + data.extend_from_slice(b"#I'm a comment\n"); + data.extend_from_slice(b"\"StartTimestamp\",etc\n"); + data.extend_from_slice(b"#Another comment\n"); + + assert_eq!(0, IntervalLogIterator::new(&data).count()); +} + +fn system_time_after_epoch(secs: u64, nanos: u32) -> time::SystemTime { + time::UNIX_EPOCH.add(time::Duration::new(secs, nanos)) +} diff --git a/src/serialization/mod.rs b/src/serialization/mod.rs index 8f9bfe5..2bd16fd 100644 --- a/src/serialization/mod.rs +++ b/src/serialization/mod.rs @@ -1,4 +1,4 @@ -//! # Serialization/deserialization +//! Serialization/deserialization support. //! //! The upstream Java project has established several different types of serialization. We have //! currently implemented V2 and V2 + DEFLATE (following the names used by the Java implementation). @@ -49,6 +49,10 @@ //! `Read`. This should make it easy to use them in almost any context, as everything from i/o //! streams to `Vec` can be a `Read` or `Write`. //! +//! # Interval logs +//! +//! See the `interval_log` module. +//! //! ### Integration with general-purpose serialization libraries //! //! In general, serializing histograms should be straightforward: pick the serialization format @@ -70,27 +74,30 @@ //! //! ``` //! use hdrsample::Histogram; -//! use hdrsample::serialization::{Deserializer, V2Serializer}; -//! -//! // part of serde, simplified -//! trait Serializer { -//! // ... -//! fn serialize_bytes(self, value: &[u8]) -> Result<(), ()>; -//! // ... -//! } +//! use hdrsample::serialization::{Serializer, V2Serializer}; +//! +//! mod serde { +//! // part of serde, simplified +//! pub trait Serializer { +//! // ... +//! fn serialize_bytes(self, value: &[u8]) -> Result<(), ()>; +//! // ... +//! } //! -//! // also in serde -//! trait Serialize { -//! fn serialize(&self, serializer: S) -> Result<(), ()>; +//! // also in serde +//! pub trait Serialize { +//! fn serialize(&self, serializer: S) -> Result<(), ()>; +//! } //! } //! //! // your custom wrapper +//! #[allow(dead_code)] // to muffle warnings compiling this example //! struct V2HistogramWrapper { //! histogram: Histogram //! } //! -//! impl Serialize for V2HistogramWrapper { -//! fn serialize(&self, serializer: S) -> Result<(), ()> { +//! impl serde::Serialize for V2HistogramWrapper { +//! fn serialize(&self, serializer: S) -> Result<(), ()> { //! // Not optimal to not re-use the vec and serializer, but it'll work //! let mut vec = Vec::new(); //! // Pick the serialization format you want to use. Here, we use plain V2, but V2 + @@ -111,13 +118,13 @@ //! //! ``` //! use hdrsample::Histogram; -//! use hdrsample::serialization::{Deserializer, V2Serializer}; +//! use hdrsample::serialization::{Deserializer, Serializer, V2Serializer}; //! //! let mut vec = Vec::new(); //! let orig_histogram = Histogram::::new(1).unwrap(); //! V2Serializer::new().serialize(&orig_histogram, &mut vec).unwrap(); //! -//! let histogram: Histogram = Deserializer::new() +//! let _histogram: Histogram = Deserializer::new() //! .deserialize(&mut vec.as_slice()).unwrap(); //! ``` //! @@ -126,7 +133,7 @@ //! //! ``` //! use hdrsample::Histogram; -//! use hdrsample::serialization::{Deserializer, V2Serializer}; +//! use hdrsample::serialization::{Deserializer, Serializer, V2Serializer}; //! use std::io::Cursor; //! //! // Naturally, do real error handling instead of unwrap() everywhere @@ -137,7 +144,7 @@ //! // Make some histograms //! for _ in 0..num_histograms { //! let mut h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); -//! h.record_n(42, 7); +//! h.record_n(42, 7).unwrap(); //! histograms.push(h); //! } //! @@ -174,6 +181,10 @@ extern crate byteorder; extern crate flate2; +use std::{fmt, io}; + +use super::{Counter, Histogram}; + #[cfg(test)] mod tests; @@ -189,6 +200,8 @@ pub use self::v2_deflate_serializer::{V2DeflateSerializeError, V2DeflateSerializ mod deserializer; pub use self::deserializer::{DeserializeError, Deserializer}; +pub mod interval_log; + const V2_COOKIE_BASE: u32 = 0x1c84_9303; const V2_COMPRESSED_COOKIE_BASE: u32 = 0x1c84_9304; @@ -196,3 +209,21 @@ const V2_COOKIE: u32 = V2_COOKIE_BASE | 0x10; const V2_COMPRESSED_COOKIE: u32 = V2_COMPRESSED_COOKIE_BASE | 0x10; const V2_HEADER_SIZE: usize = 40; + +/// Histogram serializer. +/// +/// Different implementations serialize to different formats. +pub trait Serializer { + /// Error type returned when serialization fails. + type SerializeError: fmt::Debug; + + /// Serialize the histogram into the provided writer. + /// Returns the number of bytes written, or an error. + /// + /// Note that `Vec` is a reasonable `Write` implementation for simple usage. + fn serialize( + &mut self, + h: &Histogram, + writer: &mut W, + ) -> Result; +} diff --git a/src/serialization/rand_varint.rs b/src/serialization/rand_varint.rs new file mode 100644 index 0000000..74b5dbb --- /dev/null +++ b/src/serialization/rand_varint.rs @@ -0,0 +1,129 @@ +//! This is used in tests (both unit tests and integration tests) to provide useful distributions +//! of random numbers. + +extern crate rand; + +use self::rand::Rng; +use self::rand::distributions::range::Range; +use self::rand::distributions::IndependentSample; + +/// Smallest number in our varint encoding that takes the given number of bytes +pub fn smallest_number_in_n_byte_varint(byte_length: usize) -> u64 { + assert!(byte_length <= 9 && byte_length >= 1); + + match byte_length { + 1 => 0, + // one greater than the largest of the previous length + _ => largest_number_in_n_byte_varint(byte_length - 1) + 1, + } +} + +/// Largest number in our varint encoding that takes the given number of bytes +pub fn largest_number_in_n_byte_varint(byte_length: usize) -> u64 { + assert!(byte_length <= 9 && byte_length >= 1); + + match byte_length { + 9 => u64::max_value(), + _ => largest_number_in_7_bit_chunk(byte_length - 1), + } +} + +/// The largest in the set of numbers that have at least 1 bit set in the n'th chunk of 7 bits. +fn largest_number_in_7_bit_chunk(chunk_index: usize) -> u64 { + // Our 9-byte varints do different encoding in the last byte, so we don't handle them here + assert!(chunk_index <= 7); + + // 1 in every bit below the lowest bit in this chunk + let lower_bits = match chunk_index { + 0 => 0, + _ => largest_number_in_7_bit_chunk(chunk_index - 1), + }; + + // 1 in every bit in this chunk + let this_chunk = 0x7F_u64 << (chunk_index * 7); + + lower_bits | this_chunk +} + +// Evenly distributed random numbers end up biased heavily towards longer encoded byte lengths: +// there are a lot more large numbers than there are small (duh), but for exercising serialization +// code paths, we'd like many at all byte lengths. This is also arguably more representative of +// real data. This should emit values whose varint lengths are uniformly distributed across the +// whole length range (1 to 9). +pub struct RandomVarintEncodedLengthIter { + ranges: [Range; 9], + range_for_picking_range: Range, + rng: R, +} + +impl RandomVarintEncodedLengthIter { + pub fn new(rng: R) -> RandomVarintEncodedLengthIter { + RandomVarintEncodedLengthIter { + ranges: [ + Range::new( + smallest_number_in_n_byte_varint(1), + largest_number_in_n_byte_varint(1) + 1, + ), + Range::new( + smallest_number_in_n_byte_varint(2), + largest_number_in_n_byte_varint(2) + 1, + ), + Range::new( + smallest_number_in_n_byte_varint(3), + largest_number_in_n_byte_varint(3) + 1, + ), + Range::new( + smallest_number_in_n_byte_varint(4), + largest_number_in_n_byte_varint(4) + 1, + ), + Range::new( + smallest_number_in_n_byte_varint(5), + largest_number_in_n_byte_varint(5) + 1, + ), + Range::new( + smallest_number_in_n_byte_varint(6), + largest_number_in_n_byte_varint(6) + 1, + ), + Range::new( + smallest_number_in_n_byte_varint(7), + largest_number_in_n_byte_varint(7) + 1, + ), + Range::new( + smallest_number_in_n_byte_varint(8), + largest_number_in_n_byte_varint(8) + 1, + ), + Range::new( + smallest_number_in_n_byte_varint(9), + largest_number_in_n_byte_varint(9), + ), + ], + range_for_picking_range: Range::new(0, 9), + rng, + } + } +} + +impl Iterator for RandomVarintEncodedLengthIter { + type Item = u64; + + fn next(&mut self) -> Option { + // pick the range we'll use + let value_range = self.ranges[self.range_for_picking_range.ind_sample(&mut self.rng)]; + + Some(value_range.ind_sample(&mut self.rng)) + } +} + +#[test] +fn largest_number_in_7_bit_chunk_correct() { + // 8 chunks (indices 0-7) of 7 bits gets you to 56 bits. Last byte in varint is handled + // differently, so we don't test that here. + for i in 0..8 { + let largest = largest_number_in_7_bit_chunk(i); + assert_eq!((i as u32 + 1) * 7, largest.count_ones()); + + assert_eq!(64 - ((i as u32) + 1) * 7, largest.leading_zeros()); + // any larger and it will be in the next chunk + assert_eq!(largest.leading_zeros() - 1, (largest + 1).leading_zeros()); + } +} diff --git a/src/serialization/test_serialize_trait.rs b/src/serialization/test_serialize_trait.rs deleted file mode 100644 index 263fb25..0000000 --- a/src/serialization/test_serialize_trait.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Maybe someday there will be an obvious right answer for what serialization should look like, at -// least to the user, but for now we'll only take an easily reversible step towards that. There are -// still several ways the serializer interfaces could change to achieve better performance, so -// committing to anything right now would be premature. -trait TestOnlyHypotheticalSerializerInterface { - type SerializeError: Debug; - - fn serialize(&mut self, h: &Histogram, writer: &mut W) - -> Result; -} - -impl TestOnlyHypotheticalSerializerInterface for V2Serializer { - type SerializeError = V2SerializeError; - - fn serialize(&mut self, h: &Histogram, writer: &mut W) -> Result { - self.serialize(h, writer) - } -} - -impl TestOnlyHypotheticalSerializerInterface for V2DeflateSerializer { - type SerializeError = V2DeflateSerializeError; - - fn serialize(&mut self, h: &Histogram, writer: &mut W) -> Result { - self.serialize(h, writer) - } -} diff --git a/src/serialization/tests.rs b/src/serialization/tests.rs index 177cc6f..a314d71 100644 --- a/src/serialization/tests.rs +++ b/src/serialization/tests.rs @@ -1,7 +1,7 @@ extern crate rand; -use super::{V2DeflateSerializeError, V2DeflateSerializer, V2SerializeError, V2Serializer, - V2_COOKIE, V2_HEADER_SIZE}; +use super::{Serializer, V2DeflateSerializer, V2SerializeError, V2Serializer, V2_COOKIE, + V2_HEADER_SIZE}; use super::v2_serializer::{counts_array_max_encoded_size, encode_counts, varint_write, zig_zag_encode}; use super::deserializer::{varint_read, varint_read_slice, zig_zag_decode, Deserializer}; @@ -9,13 +9,18 @@ use super::byteorder::{BigEndian, ReadBytesExt}; use super::super::{Counter, Histogram}; use num::ToPrimitive; use super::super::tests::helpers::histo64; -use std::io::{Cursor, Write}; +use std::io::Cursor; use std::fmt::{Debug, Display}; use std::iter::once; use self::rand::{Rand, Rng}; use self::rand::distributions::range::{Range, SampleRange}; use self::rand::distributions::IndependentSample; +use self::rand_varint::*; + +#[path = "rand_varint.rs"] +mod rand_varint; + #[test] fn serialize_all_zeros() { let h = histo64(1, 2047, 3); @@ -479,22 +484,6 @@ fn zig_zag_roundtrip_random() { } } -// Test that varint test helpers are correct - -#[test] -fn largest_number_in_7_bit_chunk_correct() { - // 8 chunks (indices 0-7) of 7 bits gets you to 56 bits. Last byte in varint is handled - // differently, so we don't test that here. - for i in 0..8 { - let largest = largest_number_in_7_bit_chunk(i); - assert_eq!((i as u32 + 1) * 7, largest.count_ones()); - - assert_eq!(64 - ((i as u32) + 1) * 7, largest.leading_zeros()); - // any larger and it will be in the next chunk - assert_eq!(largest.leading_zeros() - 1, (largest + 1).leading_zeros()); - } -} - fn do_varint_write_read_roundtrip_rand(byte_length: usize) { assert!(byte_length <= 9 && byte_length >= 1); @@ -553,7 +542,7 @@ fn do_varint_write_read_slice_roundtrip_rand(byte_length: usize) { fn do_serialize_roundtrip_random(mut serializer: S, max_count: T) where - S: TestOnlyHypotheticalSerializerInterface, + S: Serializer, T: Counter + Debug + Display + Rand + ToPrimitive + SampleRange, { let mut d = Deserializer::new(); @@ -635,45 +624,6 @@ fn assert_deserialized_histogram_matches_orig( ); } -/// Smallest number in our varint encoding that takes the given number of bytes -fn smallest_number_in_n_byte_varint(byte_length: usize) -> u64 { - assert!(byte_length <= 9 && byte_length >= 1); - - match byte_length { - 1 => 0, - // one greater than the largest of the previous length - _ => largest_number_in_n_byte_varint(byte_length - 1) + 1, - } -} - -/// Largest number in our varint encoding that takes the given number of bytes -fn largest_number_in_n_byte_varint(byte_length: usize) -> u64 { - assert!(byte_length <= 9 && byte_length >= 1); - - match byte_length { - 9 => u64::max_value(), - _ => largest_number_in_7_bit_chunk(byte_length - 1), - } -} - -/// The largest in the set of numbers that have at least 1 bit set in the n'th chunk of 7 bits. -fn largest_number_in_7_bit_chunk(chunk_index: usize) -> u64 { - // Our 9-byte varints do different encoding in the last byte, so we don't handle them here - assert!(chunk_index <= 7); - - // 1 in every bit below the lowest bit in this chunk - let lower_bits = match chunk_index { - 0 => 0, - _ => largest_number_in_7_bit_chunk(chunk_index - 1), - }; - - // 1 in every bit in this chunk - let this_chunk = 0x7F_u64 << (chunk_index * 7); - - lower_bits | this_chunk -} - - struct RandomRangeIter { range: Range, rng: R, @@ -681,10 +631,7 @@ struct RandomRangeIter { impl RandomRangeIter { fn new(rng: R, range: Range) -> RandomRangeIter { - RandomRangeIter { - rng: rng, - range: range, - } + RandomRangeIter { rng, range } } } @@ -695,74 +642,3 @@ impl Iterator for RandomRangeIter { Some(self.range.ind_sample(&mut self.rng)) } } - -// Evenly distributed random numbers end up biased heavily towards longer encoded byte lengths: -// there are a lot more large numbers than there are small (duh), but for exercising serialization -// code paths, we'd like many at all byte lengths. This is also arguably more representative of -// real data. This should emit values whose varint lengths are uniformly distributed across the -// whole length range (1 to 9). -struct RandomVarintEncodedLengthIter { - ranges: [Range; 9], - range_for_picking_range: Range, - rng: R, -} - -impl RandomVarintEncodedLengthIter { - fn new(rng: R) -> RandomVarintEncodedLengthIter { - RandomVarintEncodedLengthIter { - ranges: [ - Range::new( - smallest_number_in_n_byte_varint(1), - largest_number_in_n_byte_varint(1) + 1, - ), - Range::new( - smallest_number_in_n_byte_varint(2), - largest_number_in_n_byte_varint(2) + 1, - ), - Range::new( - smallest_number_in_n_byte_varint(3), - largest_number_in_n_byte_varint(3) + 1, - ), - Range::new( - smallest_number_in_n_byte_varint(4), - largest_number_in_n_byte_varint(4) + 1, - ), - Range::new( - smallest_number_in_n_byte_varint(5), - largest_number_in_n_byte_varint(5) + 1, - ), - Range::new( - smallest_number_in_n_byte_varint(6), - largest_number_in_n_byte_varint(6) + 1, - ), - Range::new( - smallest_number_in_n_byte_varint(7), - largest_number_in_n_byte_varint(7) + 1, - ), - Range::new( - smallest_number_in_n_byte_varint(8), - largest_number_in_n_byte_varint(8) + 1, - ), - Range::new( - smallest_number_in_n_byte_varint(9), - largest_number_in_n_byte_varint(9), - ), - ], - range_for_picking_range: Range::new(0, 9), - rng: rng, - } - } -} - -impl Iterator for RandomVarintEncodedLengthIter { - type Item = u64; - - fn next(&mut self) -> Option { - // pick the range we'll use - let value_range = self.ranges[self.range_for_picking_range.ind_sample(&mut self.rng)]; - - Some(value_range.ind_sample(&mut self.rng)) - } -} - -include!("test_serialize_trait.rs"); diff --git a/src/serialization/v2_deflate_serializer.rs b/src/serialization/v2_deflate_serializer.rs index 508de4f..beb5183 100644 --- a/src/serialization/v2_deflate_serializer.rs +++ b/src/serialization/v2_deflate_serializer.rs @@ -1,6 +1,6 @@ use super::super::Histogram; use core::counter::Counter; -use super::V2_COMPRESSED_COOKIE; +use super::{Serializer, V2_COMPRESSED_COOKIE}; use super::v2_serializer::{V2SerializeError, V2Serializer}; use super::byteorder::{BigEndian, WriteBytesExt}; use super::flate2::Compression; @@ -48,12 +48,12 @@ impl V2DeflateSerializer { v2_serializer: V2Serializer::new(), } } +} + +impl Serializer for V2DeflateSerializer { + type SerializeError = V2DeflateSerializeError; - /// Serialize the histogram into the provided writer. - /// Returns the number of bytes written, or an error. - /// - /// Note that `Vec` is a reasonable `Write` implementation for simple usage. - pub fn serialize( + fn serialize( &mut self, h: &Histogram, writer: &mut W, diff --git a/src/serialization/v2_serializer.rs b/src/serialization/v2_serializer.rs index fb61007..f384ba1 100644 --- a/src/serialization/v2_serializer.rs +++ b/src/serialization/v2_serializer.rs @@ -1,4 +1,4 @@ -use super::{V2_COOKIE, V2_HEADER_SIZE}; +use super::{Serializer, V2_COOKIE, V2_HEADER_SIZE}; use super::super::{Counter, Histogram}; use std::io::{ErrorKind, Write}; use std; @@ -38,12 +38,12 @@ impl V2Serializer { pub fn new() -> V2Serializer { V2Serializer { buf: Vec::new() } } +} + +impl Serializer for V2Serializer { + type SerializeError = V2SerializeError; - /// Serialize the histogram into the provided writer. - /// Returns the number of bytes written, or an error. - /// - /// Note that `Vec` is a reasonable `Write` implementation for simple usage. - pub fn serialize( + fn serialize( &mut self, h: &Histogram, writer: &mut W, @@ -149,7 +149,9 @@ pub fn encode_counts( // serializing. Don't want to silently eat counts beyond i63 max when serializing. // Perhaps we should provide some sort of pluggability here -- choose whether you want // to truncate counts to i63 max, or report errors if you need maximum fidelity? - count.to_i64().ok_or(V2SerializeError::CountNotSerializable)? + count + .to_i64() + .ok_or(V2SerializeError::CountNotSerializable)? }; let zz = zig_zag_encode(count_or_zeros); diff --git a/tests/data/tagged-Log.logV2.hlog b/tests/data/tagged-Log.logV2.hlog new file mode 100644 index 0000000..520f41d --- /dev/null +++ b/tests/data/tagged-Log.logV2.hlog @@ -0,0 +1,46 @@ +#[Logged with jHiccup version 2.0.7-SNAPSHOT, manually edited to duplicate contents with Tag=A] +#[Histogram log format version 1.2] +#[StartTime: 1441812279.474 (seconds since epoch), Wed Sep 09 08:24:39 PDT 2015] +"StartTimestamp","Interval_Length","Interval_Max","Interval_Compressed_Histogram" +0.127,1.007,2.769,HISTFAAAAEV42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPEBEJISEuATEZMQ4uASkhIR4nrxg9v2lMaxhvMekILGZkKmcCAEf2CsI= +Tag=A,0.127,1.007,2.769,HISTFAAAAEV42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPEBEJISEuATEZMQ4uASkhIR4nrxg9v2lMaxhvMekILGZkKmcCAEf2CsI= +1.134,0.999,0.442,HISTFAAAAEJ42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWLj45FTExAT4pBSEBKa6UkAgBi1uM7xjfMMlwMDABAC0CCjM= +Tag=A,1.134,0.999,0.442,HISTFAAAAEJ42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWLj45FTExAT4pBSEBKa6UkAgBi1uM7xjfMMlwMDABAC0CCjM= +2.133,1.001,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBE+Ph4OLgk5OSkeIS4+LgEeswIDo1+MbmdYNASYAA51CSo= +Tag=A,2.133,1.001,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBE+Ph4OLgk5OSkeIS4+LgEeswIDo1+MbmdYNASYAA51CSo= +3.134,1.001,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBExPiEpITEFGTkRKSEeOR6FkCg1hTeMXvNYlHhYABQ5CTo= +Tag=A,3.134,1.001,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBExPiEpITEFGTkRKSEeOR6FkCg1hTeMXvNYlHhYABQ5CTo= +4.135,0.997,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBE2PiERBREpBREhER4+Hj4uvQAdrTlMBldYDDhYAAugCKk= +Tag=A,4.135,0.997,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBE2PiERBREpBREhER4+Hj4uvQAdrTlMBldYDDhYAAugCKk= +5.132,1.002,0.426,HISTFAAAAEF42pNpmSzMwMAgywABTBDKT4GBgdnNYMcCBvsPEBEWPhElOR4pARUpKTkpGQkxq2mMegZnGI0+MZuIcAEAHo8Jvw== +Tag=A,5.132,1.002,0.426,HISTFAAAAEF42pNpmSzMwMAgywABTBDKT4GBgdnNYMcCBvsPEBEWPhElOR4pARUpKTkpGQkxq2mMegZnGI0+MZuIcAEAHo8Jvw== +6.134,0.999,0.442,HISTFAAAAEF42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWIS4FITEhDiEJERE+GT6ZkhZGLbl7jEqrWHREmFgAIbAJMw== +Tag=A,6.134,0.999,0.442,HISTFAAAAEF42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWIS4FITEhDiEJERE+GT6ZkhZGLbl7jEqrWHREmFgAIbAJMw== +7.133,0.999,0.459,HISTFAAAAEJ42pNpmSzMwMCgwAABTBDKD8hndjPYsYDB/gNEhEtMQEBBTk5MQERCRkBEQEWlh9FJbg9jE+MS5ig1LhYmADkkCcE= +Tag=A,7.133,0.999,0.459,HISTFAAAAEJ42pNpmSzMwMCgwAABTBDKD8hndjPYsYDB/gNEhEtMQEBBTk5MQERCRkBEQEWlh9FJbg9jE+MS5ig1LhYmADkkCcE= +8.132,1.000,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWIREgEOIQEuGT4xHg41Oo0pIqu8LYwVImwMfGBAAfkgkw +Tag=A,8.132,1.000,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWIREgEOIQEuGT4xHg41Oo0pIqu8LYwVImwMfGBAAfkgkw +9.132,1.751,1551.892,HISTFAAAAJZ42pNpmSzMwMB0nQECmCCUnwIDA7ObwY4FDPYfYDJMXFxsbGwMbBwszDwsDDxsHFw6RWJMLJMZmcqBMJrJmskSiA2ZZJmkgRBCgmheIORGI1H5rEzMQAyDzFhY2EWRWUwMWCBxQtQQhAIWJiyAaEHyFbKwsLHAADYWAWmiFeKS5gACLsIEzdQICAgBIQShEfhFABXDF+M= +Tag=A,9.132,1.751,1551.892,HISTFAAAAJZ42pNpmSzMwMB0nQECmCCUnwIDA7ObwY4FDPYfYDJMXFxsbGwMbBwszDwsDDxsHFw6RWJMLJMZmcqBMJrJmskSiA2ZZJmkgRBCgmheIORGI1H5rEzMQAyDzFhY2EWRWUwMWCBxQtQQhAIWJiyAaEHyFbKwsLHAADYWAWmiFeKS5gACLsIEzdQICAgBIQShEfhFABXDF+M= +10.883,0.250,0.426,HISTFAAAAD142pNpmSzMwMAgxQABTBDKT4GBgdnNYMcCBvsPEBEeFi4mPg4WLhY2BjY2FhYOBSkpASEtoRA+NgDkCQZR +Tag=A,10.883,0.250,0.426,HISTFAAAAD142pNpmSzMwMAgxQABTBDKT4GBgdnNYMcCBvsPEBEeFi4mPg4WLhY2BjY2FhYOBSkpASEtoRA+NgDkCQZR +11.133,1.003,0.524,HISTFAAAAER42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPUBk2HgkZKREpEQUeGSEBAQ6xSYxhCnp7GJ02sWgJsbCwMgEAO0AJSQ== +Tag=A,11.133,1.003,0.524,HISTFAAAAER42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPUBk2HgkZKREpEQUeGSEBAQ6xSYxhCnp7GJ02sWgJsbCwMgEAO0AJSQ== +12.136,0.997,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPUBk2AT4eCQURHgkuEREOHjERlSQhhWuMSV9Y7ERYWAAa4gko +Tag=A,12.136,0.997,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPUBk2AT4eCQURHgkuEREOHjERlSQhhWuMSV9Y7ERYWAAa4gko +13.133,0.998,0.459,HISTFAAAAD942pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPMBkRIR4RMRk5KQE+PgEhMRmzEjWZJ4whW1hMBNiYAB42CTA= +Tag=A,13.133,0.998,0.459,HISTFAAAAD942pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPMBkRIR4RMRk5KQE+PgEhMRmzEjWZJ4whW1hMBNiYAB42CTA= +14.131,1.000,0.492,HISTFAAAAEN42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPUBkWFhE5GT4FKQkRCR4ZCREpqwmMBhpHGG16WHx42JgYmAA6swk+ +Tag=A,14.131,1.000,0.492,HISTFAAAAEN42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPUBkWFhE5GT4FKQkRCR4ZCREpqwmMBhpHGG16WHx42JgYmAA6swk+ +15.131,1.001,0.442,HISTFAAAAD542pNpmSzMwMAgywABTBDKT4GBgdnNYMcCBvsPMBkuMTEFHgklFRkRATkJERGdKgudfYwRTSwGalwAF2IJOw== +Tag=A,15.131,1.001,0.442,HISTFAAAAD542pNpmSzMwMAgywABTBDKT4GBgdnNYMcCBvsPMBkuMTEFHgklFRkRATkJERGdKgudfYwRTSwGalwAF2IJOw== +16.132,1.001,0.524,HISTFAAAAEZ42pNpmSzMwMCgxAABTBDKT4GBgdnNYMcCBvsPEBE2IQEFCQkpGREpHj4hKS6NU4z7GDMkuBoYDSYw2wiwMLEyAQBQ3wne +Tag=A,16.132,1.001,0.524,HISTFAAAAEZ42pNpmSzMwMCgxAABTBDKT4GBgdnNYMcCBvsPEBE2IQEFCQkpGREpHj4hKS6NU4z7GDMkuBoYDSYw2wiwMLEyAQBQ3wne +17.133,0.998,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPUBk2DjElIR4RHiExKQE5IT61iCodtXWMdn0sKVJMTAAekAk0 +Tag=A,17.133,0.998,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPUBk2DjElIR4RHiExKQE5IT61iCodtXWMdn0sKVJMTAAekAk0 +18.131,1.000,0.459,HISTFAAAAEF42pNpmSzMwMAgzwABTBDKT4GBgdnNYMcCBvsPUBkWISERJSUJESklHhEJEREhqwZGLakPjDZdLBYCHCwAKOkJPg== +Tag=A,18.131,1.000,0.459,HISTFAAAAEF42pNpmSzMwMAgzwABTBDKT4GBgdnNYMcCBvsPUBkWISERJSUJESklHhEJEREhqwZGLakPjDZdLBYCHCwAKOkJPg== +19.131,1.000,0.475,HISTFAAAAEF42pNpmSzMwMAgzwABTBDKT4GBgdnNYMcCBvsPUAk2HjkJBSk+Pi4BMT4xIQE9pxIluTOMPhtYbITY2JgAKLoJOQ== +Tag=A,19.131,1.000,0.475,HISTFAAAAEF42pNpmSzMwMAgzwABTBDKT4GBgdnNYMcCBvsPUAk2HjkJBSk+Pi4BMT4xIQE9pxIluTOMPhtYbITY2JgAKLoJOQ== +20.131,1.004,0.475,HISTFAAAAEF42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBFmPhEJOSEhDi4+ETEeASEhswIVi1+MFjtYvCRYGJgAIP8JNw== +Tag=A,20.131,1.004,0.475,HISTFAAAAEF42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBFmPhEJOSEhDi4+ETEeASEhswIVi1+MFjtYvCRYGJgAIP8JNw== diff --git a/tests/data_access.rs b/tests/data_access.rs index 6523726..3ea9e19 100644 --- a/tests/data_access.rs +++ b/tests/data_access.rs @@ -517,7 +517,6 @@ fn linear_iter_steps() { histogram += 1; histogram += 64; histogram += 128; - println!("{:?}", histogram.iter_linear(64).collect::>()); assert_eq!(histogram.iter_linear(64).count(), 4); } diff --git a/tests/histogram.rs b/tests/histogram.rs index c8fcc96..42096ee 100644 --- a/tests/histogram.rs +++ b/tests/histogram.rs @@ -535,27 +535,6 @@ fn total_count_overflow_from_add_with_resize_saturates() { assert_eq!(u64::max_value(), h.len()); } -#[test] -#[cfg(feature = "serialization")] -fn total_count_overflow_from_deserialize_saturates() { - use hdrsample::serialization::{Deserializer, V2Serializer}; - let mut h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); - - // can't go bigger than i64 max because it will be serialized - h.record_n(1, i64::max_value() as u64).unwrap(); - h.record_n(1000, i64::max_value() as u64).unwrap(); - h.record_n(1_000_000, i64::max_value() as u64).unwrap(); - assert_eq!(u64::max_value(), h.len()); - - let mut vec = Vec::new(); - - V2Serializer::new().serialize(&h, &mut vec).unwrap(); - let deser_h: Histogram = Deserializer::new() - .deserialize(&mut vec.as_slice()) - .unwrap(); - assert_eq!(u64::max_value(), deser_h.len()); -} - #[test] fn subtract_underflow_guarded_by_per_value_count_check() { let mut h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); diff --git a/tests/interval_log.rs b/tests/interval_log.rs new file mode 100644 index 0000000..e59c9e9 --- /dev/null +++ b/tests/interval_log.rs @@ -0,0 +1,355 @@ +#[cfg(all(feature = "serialization", test))] +mod tests { + extern crate base64; + extern crate hdrsample; + extern crate rand; + + use self::rand::Rng; + + use self::hdrsample::Histogram; + use self::hdrsample::serialization::{Deserializer, Serializer, V2Serializer}; + use self::hdrsample::serialization::interval_log::{IntervalLogHistogram, IntervalLogIterator, + IntervalLogWriterBuilder, LogEntry, + LogIteratorError, Tag}; + + use std::{io, iter, str, time}; + use std::io::{BufRead, Read}; + use std::fs::File; + use std::path::Path; + + #[test] + fn parse_sample_tagged_interval_log_start_timestamp() { + let data = load_iterator_from_file(Path::new("tests/data/tagged-Log.logV2.hlog")); + let start_count = data.into_iter() + .map(|r| r.unwrap()) + .filter_map(|e| match e { + LogEntry::StartTime(t) => Some(t), + _ => None, + }) + .count(); + assert_eq!(1, start_count); + } + + #[test] + fn parse_sample_tagged_interval_log_interval_count() { + let data = load_iterator_from_file(Path::new("tests/data/tagged-Log.logV2.hlog")); + let intervals = data.into_iter() + .map(|r| r.unwrap()) + .filter_map(|e| match e { + LogEntry::Interval(ilh) => Some(ilh), + _ => None, + }) + .collect::>(); + + assert_eq!(42, intervals.len()); + + // half have tags, half do not + assert_eq!( + 21, + intervals.iter().filter(|ilh| ilh.tag().is_none()).count() + ); + assert_eq!( + 21, + intervals.iter().filter(|ilh| !ilh.tag().is_none()).count() + ); + } + + #[test] + fn parse_sample_tagged_interval_log_interval_metadata() { + let data = load_iterator_from_file(Path::new("tests/data/tagged-Log.logV2.hlog")); + let intervals = data.into_iter() + .map(|r| r.unwrap()) + .filter_map(|e| match e { + LogEntry::Interval(ilh) => Some(ilh), + _ => None, + }) + .collect::>(); + + let expected = vec![ + (0.127, 1.007, 2.769), + (1.134, 0.999, 0.442), + (2.133, 1.001, 0.426), + (3.134, 1.001, 0.426), + (4.135, 0.997, 0.426), + (5.132, 1.002, 0.426), + (6.134, 0.999, 0.442), + (7.133, 0.999, 0.459), + (8.132, 1.0, 0.459), + (9.132, 1.751, 1551.892), + (10.883, 0.25, 0.426), + (11.133, 1.003, 0.524), + (12.136, 0.997, 0.459), + (13.133, 0.998, 0.459), + (14.131, 1.0, 0.492), + (15.131, 1.001, 0.442), + (16.132, 1.001, 0.524), + (17.133, 0.998, 0.459), + (18.131, 1.0, 0.459), + (19.131, 1.0, 0.475), + (20.131, 1.004, 0.475), + ]; + + // tagged and un-tagged are identical + + assert_eq!( + expected, + intervals + .iter() + .filter(|ilh| ilh.tag().is_none()) + .map(|ilh| { + ( + round(duration_as_fp_seconds(ilh.start_timestamp())), + round(duration_as_fp_seconds(ilh.duration())), + ilh.max(), + ) + }) + .collect::>() + ); + + assert_eq!( + expected, + intervals + .iter() + .filter(|ilh| !ilh.tag().is_none()) + .map(|ilh| { + ( + round(duration_as_fp_seconds(ilh.start_timestamp())), + round(duration_as_fp_seconds(ilh.duration())), + ilh.max(), + ) + }) + .collect::>() + ); + + let mut deserializer = Deserializer::new(); + for ilh in intervals { + let serialized_histogram = + base64::decode_config(ilh.encoded_histogram(), base64::STANDARD).unwrap(); + let decoded_hist: Histogram = deserializer + .deserialize(&mut io::Cursor::new(&serialized_histogram)) + .unwrap(); + + // this log happened to use 1000000 as the scaling factor. It was also formatted to 3 + // decimal places. + assert_eq!(round(decoded_hist.max() as f64 / 1_000_000_f64), ilh.max()); + } + } + + #[test] + fn parse_sample_tagged_interval_log_rewrite_identical() { + // trim off the comments and legend line + let reader = + io::BufReader::new(File::open(Path::new("tests/data/tagged-Log.logV2.hlog")).unwrap()); + + // the sample log uses DEFLATE compressed histograms, which we can't match exactly, so the + // best we can do is to re-serialize each one as uncompressed. + + let mut serializer = V2Serializer::new(); + let mut deserializer = Deserializer::new(); + + let mut serialize_buf = Vec::new(); + let mut log_without_headers = Vec::new(); + reader + .lines() + .skip(4) + .map(|r| r.unwrap()) + .for_each(|mut line| { + let hist_index = line.rfind("HISTF").unwrap(); + let serialized = + base64::decode_config(&line[hist_index..], base64::STANDARD).unwrap(); + + let decoded_hist: Histogram = deserializer + .deserialize(&mut io::Cursor::new(serialized)) + .unwrap(); + + serialize_buf.clear(); + serializer + .serialize(&decoded_hist, &mut serialize_buf) + .unwrap(); + + // replace the deflate histogram with the predictable non-deflate one + line.truncate(hist_index); + line.push_str(&base64::encode_config(&serialize_buf, base64::STANDARD)); + + log_without_headers.extend_from_slice(line.as_bytes()); + log_without_headers.extend_from_slice("\n".as_bytes()); + }); + + let mut duplicate_log = Vec::new(); + + { + let mut writer = IntervalLogWriterBuilder::new() + .with_max_value_divisor(1_000_000.0) + .begin_log_with(&mut duplicate_log, &mut serializer) + .unwrap(); + + IntervalLogIterator::new(&log_without_headers) + .map(|r| r.unwrap()) + .filter_map(|e| match e { + LogEntry::Interval(ilh) => Some(ilh), + _ => None, + }) + .for_each(|ilh| { + let serialized_histogram = + base64::decode_config(ilh.encoded_histogram(), base64::STANDARD).unwrap(); + let decoded_hist: Histogram = deserializer + .deserialize(&mut io::Cursor::new(&serialized_histogram)) + .unwrap(); + + writer + .write_histogram( + &decoded_hist, + ilh.start_timestamp(), + ilh.duration(), + ilh.tag(), + ) + .unwrap(); + }); + } + + + let orig_str = str::from_utf8(&log_without_headers).unwrap(); + let rewritten_str = str::from_utf8(&duplicate_log) + .unwrap() + .lines() + // remove our #[MaxValueDivisor] comment + .filter(|l| !l.starts_with("#[MaxValueDivisor: ")) + // put newlines back in + .flat_map(|l| iter::once(l).chain(iter::once("\n"))) + .collect::(); + + + assert_eq!(orig_str, rewritten_str); + } + + #[test] + fn write_random_histograms_to_interval_log_then_read() { + let mut rng = rand::thread_rng(); + + let mut histograms = Vec::new(); + let mut tags = Vec::new(); + + let mut log_buf = Vec::new(); + let mut serializer = V2Serializer::new(); + + let max_scaling_factor = 1_000_000.0; + + { + let mut writer = IntervalLogWriterBuilder::new() + .with_max_value_divisor(max_scaling_factor) + .begin_log_with(&mut log_buf, &mut serializer) + .unwrap(); + + writer.write_comment("start").unwrap(); + + for i in 0_u32..100 { + let mut h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); + + for _ in 0..100 { + // ensure no count above i64::max_value(), even when many large values are + // bucketed together + h.record_n(rng.gen::() >> 32, rng.gen::() >> 32) + .unwrap(); + } + + if rng.gen() { + tags.push(Some(format!("t{}", i))); + } else { + tags.push(None); + }; + let current_tag_str = tags.last().unwrap(); + let tag = current_tag_str + .as_ref() + .map(|s| Tag::new(s.as_str()).unwrap()); + + writer + .write_histogram( + &h, + time::Duration::from_secs(i as u64), + time::Duration::new(10_000 + i as u64, 0), + tag, + ) + .unwrap(); + + writer.write_comment(&format!("line {}", i)).unwrap(); + + histograms.push(h); + } + } + + let parsed = IntervalLogIterator::new(&log_buf) + .filter_map(|e| match e { + Ok(LogEntry::Interval(ilh)) => Some(ilh), + _ => None, + }) + .collect::>(); + + assert_eq!(histograms.len(), parsed.len()); + + let mut deserializer = Deserializer::new(); + for (index, ilh) in parsed.iter().enumerate() { + let serialized_histogram = + base64::decode_config(ilh.encoded_histogram(), base64::STANDARD).unwrap(); + let decoded_hist: Histogram = deserializer + .deserialize(&mut io::Cursor::new(&serialized_histogram)) + .unwrap(); + let original_hist = &histograms[index]; + + assert_eq!(original_hist, &decoded_hist); + + assert_eq!(index as u64, ilh.start_timestamp().as_secs()); + assert_eq!( + time::Duration::new(10_000 + index as u64, 0), + ilh.duration() + ); + assert_eq!( + round(original_hist.max() as f64 / max_scaling_factor), + ilh.max() + ); + let tag_string: Option = tags.get(index).unwrap().as_ref().map(|s| s.clone()); + assert_eq!(tag_string, ilh.tag().map(|t| t.as_str().to_owned())); + } + } + + #[test] + fn parse_interval_log_syntax_error_then_returns_none() { + let log = "#Foo\nBar\n".as_bytes(); + + let mut iter = IntervalLogIterator::new(&log); + + assert_eq!( + Some(Err(LogIteratorError::ParseError { offset: 5 })), + iter.next() + ); + assert_eq!(None, iter.next()); + } + + /// Round to 3 digits the way floats are in the log + fn round(f: f64) -> f64 { + format!("{:.3}", f).parse::().unwrap() + } + + fn duration_as_fp_seconds(d: time::Duration) -> f64 { + d.as_secs() as f64 + d.subsec_nanos() as f64 / 1_000_000_000_f64 + } + + fn load_iterator_from_file<'a>(path: &Path) -> IntervalLogBufHolder { + let mut buf = Vec::new(); + let _ = File::open(path).unwrap().read_to_end(&mut buf).unwrap(); + + IntervalLogBufHolder { data: buf } + } + + struct IntervalLogBufHolder { + data: Vec, + } + + impl<'a> IntoIterator for &'a IntervalLogBufHolder { + type Item = Result, LogIteratorError>; + type IntoIter = IntervalLogIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + IntervalLogIterator::new(self.data.as_slice()) + } + } +} diff --git a/tests/serialization.rs b/tests/serialization.rs index 87c8d68..3f6b8f4 100644 --- a/tests/serialization.rs +++ b/tests/serialization.rs @@ -3,7 +3,7 @@ mod tests { extern crate hdrsample; use self::hdrsample::Histogram; - use self::hdrsample::serialization::{Deserializer, V2Serializer}; + use self::hdrsample::serialization::{Deserializer, Serializer, V2Serializer}; use std::io::{BufRead, BufReader, Read}; use std::fs::File; @@ -50,6 +50,25 @@ mod tests { assert_eq!(h, deser_h); } + #[test] + fn total_count_overflow_from_deserialize_saturates() { + let mut h = Histogram::::new_with_bounds(1, u64::max_value(), 3).unwrap(); + + // can't go bigger than i64 max because it will be serialized + h.record_n(1, i64::max_value() as u64).unwrap(); + h.record_n(1000, i64::max_value() as u64).unwrap(); + h.record_n(1000_000, i64::max_value() as u64).unwrap(); + assert_eq!(u64::max_value(), h.len()); + + let mut vec = Vec::new(); + + V2Serializer::new().serialize(&h, &mut vec).unwrap(); + let deser_h: Histogram = Deserializer::new() + .deserialize(&mut vec.as_slice()) + .unwrap(); + assert_eq!(u64::max_value(), deser_h.len()); + } + fn load_histogram_from_num_per_line(path: &Path) -> Histogram { // max is Java's Long.MAX_VALUE let mut h: Histogram =