Skip to content

Commit 246d6a9

Browse files
authored
Merge pull request #42 from marshallpierce/deser-dont-set-min-value-for-zero-count
Fix occasional test failures caused by incorrect min value tracking
2 parents ea9211b + e1857a2 commit 246d6a9

File tree

4 files changed

+55
-12
lines changed

4 files changed

+55
-12
lines changed

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ const ORIGINAL_MAX: u64 = 0;
173173
/// of percentiles.
174174
pub trait Counter
175175
: num::Num + num::ToPrimitive + num::FromPrimitive + num::Saturating + num::CheckedSub
176-
+ num::CheckedAdd + Copy + PartialOrd<Self>{
176+
+ num::CheckedAdd + Copy + PartialOrd<Self> {
177177
}
178178

179179
// auto-implement marker trait

src/serialization/deserializer.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -240,12 +240,11 @@ pub fn zig_zag_decode(encoded: u64) -> i64 {
240240
/// We need to perform the same logic in two different decode loops while carrying over a modicum
241241
/// of state.
242242
struct DecodeLoopState<T: Counter> {
243-
dest_index:usize,
243+
dest_index: usize,
244244
phantom: PhantomData<T>
245245
}
246246

247-
impl <T: Counter> DecodeLoopState<T> {
248-
247+
impl<T: Counter> DecodeLoopState<T> {
249248
fn new() -> DecodeLoopState<T> {
250249
DecodeLoopState {
251250
dest_index: 0,
@@ -266,10 +265,12 @@ impl <T: Counter> DecodeLoopState<T> {
266265
let count: T = T::from_i64(count_or_zeros)
267266
.ok_or(DeserializeError::UnsuitableCounterType)?;
268267

269-
h.set_count_at_index(self.dest_index, count)
270-
.map_err(|_| DeserializeError::EncodedArrayTooLong)?;
268+
if count > T::zero() {
269+
h.set_count_at_index(self.dest_index, count)
270+
.map_err(|_| DeserializeError::EncodedArrayTooLong)?;
271271

272-
restat_state.on_nonzero_count(self.dest_index, count);
272+
restat_state.on_nonzero_count(self.dest_index, count);
273+
}
273274

274275
self.dest_index = self.dest_index.checked_add(1)
275276
.ok_or(DeserializeError::UsizeTypeTooSmall)?;

src/serialization/tests.rs

+45-4
Original file line numberDiff line numberDiff line change
@@ -515,16 +515,15 @@ fn do_serialize_roundtrip_random<T>(max_count: T)
515515
let mut s = V2Serializer::new();
516516
let mut d = Deserializer::new();
517517
let mut vec = Vec::new();
518-
let mut rng = rand::weak_rng();
518+
let mut count_rng = rand::weak_rng();
519519

520520
let range = Range::<T>::new(T::one(), max_count);
521521
for _ in 0..100 {
522522
vec.clear();
523523
let mut h = Histogram::<T>::new_with_bounds(1, u64::max_value(), 3).unwrap();
524524

525-
for _ in 0..1000 {
526-
let count = range.ind_sample(&mut rng);
527-
let value = rng.gen();
525+
for value in RandomVarintEncodedLengthIter::new(rand::weak_rng()).take(1000) {
526+
let count = range.ind_sample(&mut count_rng);
528527
// don't let accumulated per-value count exceed max_count
529528
let existing_count = h.count_at(value).unwrap();
530529
let sum = existing_count.saturating_add(count);
@@ -633,3 +632,45 @@ impl<T: SampleRange, R: Rng> Iterator for RandomRangeIter<T, R> {
633632
Some(self.range.ind_sample(&mut self.rng))
634633
}
635634
}
635+
636+
// Evenly distributed random numbers end up biased heavily towards longer encoded byte lengths:
637+
// there are a lot more large numbers than there are small (duh), but for exercising serialization
638+
// code paths, we'd like many at all byte lengths. This is also arguably more representative of
639+
// real data. This should emit values whose varint lengths are uniformly distributed across the
640+
// whole length range (1 to 9).
641+
struct RandomVarintEncodedLengthIter<R: Rng> {
642+
ranges: [Range<u64>; 9],
643+
range_for_picking_range: Range<usize>,
644+
rng: R
645+
}
646+
647+
impl<R: Rng> RandomVarintEncodedLengthIter<R> {
648+
fn new(rng: R) -> RandomVarintEncodedLengthIter<R> {
649+
RandomVarintEncodedLengthIter {
650+
ranges: [
651+
Range::new(smallest_number_in_n_byte_varint(1), largest_number_in_n_byte_varint(1) + 1),
652+
Range::new(smallest_number_in_n_byte_varint(2), largest_number_in_n_byte_varint(2) + 1),
653+
Range::new(smallest_number_in_n_byte_varint(3), largest_number_in_n_byte_varint(3) + 1),
654+
Range::new(smallest_number_in_n_byte_varint(4), largest_number_in_n_byte_varint(4) + 1),
655+
Range::new(smallest_number_in_n_byte_varint(5), largest_number_in_n_byte_varint(5) + 1),
656+
Range::new(smallest_number_in_n_byte_varint(6), largest_number_in_n_byte_varint(6) + 1),
657+
Range::new(smallest_number_in_n_byte_varint(7), largest_number_in_n_byte_varint(7) + 1),
658+
Range::new(smallest_number_in_n_byte_varint(8), largest_number_in_n_byte_varint(8) + 1),
659+
Range::new(smallest_number_in_n_byte_varint(9), largest_number_in_n_byte_varint(9)),
660+
],
661+
range_for_picking_range: Range::new(0, 9),
662+
rng: rng
663+
}
664+
}
665+
}
666+
667+
impl<R: Rng> Iterator for RandomVarintEncodedLengthIter<R> {
668+
type Item = u64;
669+
670+
fn next(&mut self) -> Option<Self::Item> {
671+
// pick the range we'll use
672+
let value_range = self.ranges[self.range_for_picking_range.ind_sample(&mut self.rng)];
673+
674+
Some(value_range.ind_sample(&mut self.rng))
675+
}
676+
}

src/serialization/v2_serializer.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ pub fn encode_counts<T: Counter>(h: &Histogram<T>, buf: &mut [u8]) -> Result<usi
107107
let count = unsafe { *(h.counts.get_unchecked(index)) };
108108
index += 1;
109109

110-
// Non-negative values are positive counts or 1 zero, negative values are repeated zeros.
110+
// Non-negative values are counts for the respective value, negative values are skipping
111+
// that many (absolute value) zero-count values.
111112

112113
let mut zero_count = 0;
113114
if count == T::zero() {

0 commit comments

Comments
 (0)