@@ -515,16 +515,15 @@ fn do_serialize_roundtrip_random<T>(max_count: T)
515
515
let mut s = V2Serializer :: new ( ) ;
516
516
let mut d = Deserializer :: new ( ) ;
517
517
let mut vec = Vec :: new ( ) ;
518
- let mut rng = rand:: weak_rng ( ) ;
518
+ let mut count_rng = rand:: weak_rng ( ) ;
519
519
520
520
let range = Range :: < T > :: new ( T :: one ( ) , max_count) ;
521
521
for _ in 0 ..100 {
522
522
vec. clear ( ) ;
523
523
let mut h = Histogram :: < T > :: new_with_bounds ( 1 , u64:: max_value ( ) , 3 ) . unwrap ( ) ;
524
524
525
- for _ in 0 ..1000 {
526
- let count = range. ind_sample ( & mut rng) ;
527
- let value = rng. gen ( ) ;
525
+ for value in RandomVarintEncodedLengthIter :: new ( rand:: weak_rng ( ) ) . take ( 1000 ) {
526
+ let count = range. ind_sample ( & mut count_rng) ;
528
527
// don't let accumulated per-value count exceed max_count
529
528
let existing_count = h. count_at ( value) . unwrap ( ) ;
530
529
let sum = existing_count. saturating_add ( count) ;
@@ -633,3 +632,45 @@ impl<T: SampleRange, R: Rng> Iterator for RandomRangeIter<T, R> {
633
632
Some ( self . range . ind_sample ( & mut self . rng ) )
634
633
}
635
634
}
635
+
636
+ // Evenly distributed random numbers end up biased heavily towards longer encoded byte lengths:
637
+ // there are a lot more large numbers than there are small (duh), but for exercising serialization
638
+ // code paths, we'd like many at all byte lengths. This is also arguably more representative of
639
+ // real data. This should emit values whose varint lengths are uniformly distributed across the
640
+ // whole length range (1 to 9).
641
+ struct RandomVarintEncodedLengthIter < R : Rng > {
642
+ ranges : [ Range < u64 > ; 9 ] ,
643
+ range_for_picking_range : Range < usize > ,
644
+ rng : R
645
+ }
646
+
647
+ impl < R : Rng > RandomVarintEncodedLengthIter < R > {
648
+ fn new ( rng : R ) -> RandomVarintEncodedLengthIter < R > {
649
+ RandomVarintEncodedLengthIter {
650
+ ranges : [
651
+ Range :: new ( smallest_number_in_n_byte_varint ( 1 ) , largest_number_in_n_byte_varint ( 1 ) + 1 ) ,
652
+ Range :: new ( smallest_number_in_n_byte_varint ( 2 ) , largest_number_in_n_byte_varint ( 2 ) + 1 ) ,
653
+ Range :: new ( smallest_number_in_n_byte_varint ( 3 ) , largest_number_in_n_byte_varint ( 3 ) + 1 ) ,
654
+ Range :: new ( smallest_number_in_n_byte_varint ( 4 ) , largest_number_in_n_byte_varint ( 4 ) + 1 ) ,
655
+ Range :: new ( smallest_number_in_n_byte_varint ( 5 ) , largest_number_in_n_byte_varint ( 5 ) + 1 ) ,
656
+ Range :: new ( smallest_number_in_n_byte_varint ( 6 ) , largest_number_in_n_byte_varint ( 6 ) + 1 ) ,
657
+ Range :: new ( smallest_number_in_n_byte_varint ( 7 ) , largest_number_in_n_byte_varint ( 7 ) + 1 ) ,
658
+ Range :: new ( smallest_number_in_n_byte_varint ( 8 ) , largest_number_in_n_byte_varint ( 8 ) + 1 ) ,
659
+ Range :: new ( smallest_number_in_n_byte_varint ( 9 ) , largest_number_in_n_byte_varint ( 9 ) ) ,
660
+ ] ,
661
+ range_for_picking_range : Range :: new ( 0 , 9 ) ,
662
+ rng : rng
663
+ }
664
+ }
665
+ }
666
+
667
+ impl < R : Rng > Iterator for RandomVarintEncodedLengthIter < R > {
668
+ type Item = u64 ;
669
+
670
+ fn next ( & mut self ) -> Option < Self :: Item > {
671
+ // pick the range we'll use
672
+ let value_range = self . ranges [ self . range_for_picking_range . ind_sample ( & mut self . rng ) ] ;
673
+
674
+ Some ( value_range. ind_sample ( & mut self . rng ) )
675
+ }
676
+ }
0 commit comments