Skip to content

Commit 70e8176

Browse files
committed
merge
1 parent a155e04 commit 70e8176

File tree

8 files changed

+507
-392
lines changed

8 files changed

+507
-392
lines changed

Cargo.lock

Lines changed: 452 additions & 340 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ async fn test_median() {
233233
/// 1. Floating point numbers
234234
/// 1. structured types
235235
fn baseline_config() -> DatasetGeneratorConfig {
236-
let mut rng = thread_rng();
237-
let columns = get_supported_types_columns(rng.gen());
236+
let mut rng = rng();
237+
let columns = get_supported_types_columns(rng.random());
238238

239239
let min_num_rows = 512;
240240
let max_num_rows = 1024;
@@ -682,8 +682,8 @@ async fn test_single_mode_aggregate_with_spill() -> Result<()> {
682682
Arc::new(StringArray::from(
683683
(0..1024)
684684
.map(|_| -> String {
685-
thread_rng()
686-
.sample_iter::<char, _>(rand::distributions::Standard)
685+
rng()
686+
.sample_iter::<char, _>(rand::distr::StandardUniform)
687687
.take(5)
688688
.collect()
689689
})

datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -537,25 +537,25 @@ impl QueryBuilder {
537537
.cloned()
538538
.collect();
539539

540-
available_columns.shuffle(&mut thread_rng());
540+
available_columns.shuffle(&mut rng());
541541

542542
let num_of_order_by_col = 12;
543543
let column_count = std::cmp::min(num_of_order_by_col, available_columns.len());
544544

545545
let selected_columns = &available_columns[0..column_count];
546546

547-
let mut rng = thread_rng();
547+
let mut rng = rng();
548548
let mut result = String::from_str(" order by ").unwrap();
549549
for col in selected_columns {
550-
let order = if rng.gen_bool(0.5) { "ASC" } else { "DESC" };
550+
let order = if rng.random_bool(0.5) { "ASC" } else { "DESC" };
551551
result.push_str(&format!("{} {},", col, order));
552552
}
553553

554554
result.strip_suffix(",").unwrap().to_string()
555555
}
556556

557557
fn null_opt(&self) -> String {
558-
if thread_rng().gen_bool(0.5) {
558+
if rng().random_bool(0.5) {
559559
"RESPECT NULLS".to_string()
560560
} else {
561561
"IGNORE NULLS".to_string()

datafusion/core/tests/fuzz_cases/record_batch_generator.rs

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use arrow_schema::{
3232
DECIMAL256_MAX_SCALE,
3333
};
3434
use datafusion_common::{arrow_datafusion_err, DataFusionError, Result};
35-
use rand::{rngs::StdRng, thread_rng, Rng, SeedableRng};
35+
use rand::{rng, rngs::StdRng, Rng, SeedableRng};
3636
use test_utils::array_gen::{
3737
BinaryArrayGenerator, BooleanArrayGenerator, DecimalArrayGenerator,
3838
PrimitiveArrayGenerator, StringArrayGenerator,
@@ -86,15 +86,15 @@ pub fn get_supported_types_columns(rng_seed: u64) -> Vec<ColumnDescr> {
8686
DataType::Interval(IntervalUnit::MonthDayNano),
8787
),
8888
ColumnDescr::new("decimal128", {
89-
let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION);
90-
let scale: i8 = rng.gen_range(
89+
let precision: u8 = rng.random_range(1..=DECIMAL128_MAX_PRECISION);
90+
let scale: i8 = rng.random_range(
9191
i8::MIN..=std::cmp::min(precision as i8, DECIMAL128_MAX_SCALE),
9292
);
9393
DataType::Decimal128(precision, scale)
9494
}),
9595
ColumnDescr::new("decimal256", {
96-
let precision: u8 = rng.gen_range(1..=DECIMAL256_MAX_PRECISION);
97-
let scale: i8 = rng.gen_range(
96+
let precision: u8 = rng.random_range(1..=DECIMAL256_MAX_PRECISION);
97+
let scale: i8 = rng.random_range(
9898
i8::MIN..=std::cmp::min(precision as i8, DECIMAL256_MAX_SCALE),
9999
);
100100
DataType::Decimal256(precision, scale)
@@ -161,14 +161,15 @@ pub struct RecordBatchGenerator {
161161

162162
/// If a seed is provided when constructing the generator, it will be used to
163163
/// create `rng` and the pseudo-randomly generated batches will be deterministic.
164-
/// Otherwise, `rng` will be initialized using `thread_rng()` and the batches
164+
/// Otherwise, `rng` will be initialized using `rng()` and the batches
165165
/// generated will be different each time.
166166
rng: StdRng,
167167
}
168168

169169
macro_rules! generate_decimal_array {
170170
($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT: expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $PRECISION: ident, $SCALE: ident, $ARROW_TYPE: ident) => {{
171-
let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
171+
let null_pct_idx =
172+
$BATCH_GEN_RNG.random_range(0..$SELF.candidate_null_pcts.len());
172173
let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
173174

174175
let mut generator = DecimalArrayGenerator {
@@ -188,7 +189,8 @@ macro_rules! generate_decimal_array {
188189
macro_rules! generate_boolean_array {
189190
($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE: ident) => {{
190191
// Select a null percentage from the candidate percentages
191-
let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
192+
let null_pct_idx =
193+
$BATCH_GEN_RNG.random_range(0..$SELF.candidate_null_pcts.len());
192194
let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
193195

194196
let num_distinct_booleans = if $MAX_NUM_DISTINCT >= 2 { 2 } else { 1 };
@@ -206,7 +208,8 @@ macro_rules! generate_boolean_array {
206208

207209
macro_rules! generate_primitive_array {
208210
($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {{
209-
let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
211+
let null_pct_idx =
212+
$BATCH_GEN_RNG.random_range(0..$SELF.candidate_null_pcts.len());
210213
let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
211214

212215
let mut generator = PrimitiveArrayGenerator {
@@ -235,7 +238,7 @@ impl RecordBatchGenerator {
235238
max_rows_num,
236239
columns,
237240
candidate_null_pcts,
238-
rng: StdRng::from_rng(thread_rng()).unwrap(),
241+
rng: StdRng::from_rng(&mut rng()),
239242
}
240243
}
241244

@@ -247,9 +250,9 @@ impl RecordBatchGenerator {
247250
}
248251

249252
pub fn generate(&mut self) -> Result<RecordBatch> {
250-
let num_rows = self.rng.gen_range(self.min_rows_num..=self.max_rows_num);
251-
let array_gen_rng = StdRng::from_seed(self.rng.gen());
252-
let mut batch_gen_rng = StdRng::from_seed(self.rng.gen());
253+
let num_rows = self.rng.random_range(self.min_rows_num..=self.max_rows_num);
254+
let array_gen_rng = StdRng::from_seed(self.rng.random());
255+
let mut batch_gen_rng = StdRng::from_seed(self.rng.random());
253256
let columns = self.columns.clone();
254257

255258
// Build arrays
@@ -283,7 +286,7 @@ impl RecordBatchGenerator {
283286
array_gen_rng: StdRng,
284287
) -> ArrayRef {
285288
let num_distinct = if num_rows > 1 {
286-
batch_gen_rng.gen_range(1..num_rows)
289+
batch_gen_rng.random_range(1..num_rows)
287290
} else {
288291
num_rows
289292
};
@@ -526,9 +529,9 @@ impl RecordBatchGenerator {
526529
}
527530
DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
528531
let null_pct_idx =
529-
batch_gen_rng.gen_range(0..self.candidate_null_pcts.len());
532+
batch_gen_rng.random_range(0..self.candidate_null_pcts.len());
530533
let null_pct = self.candidate_null_pcts[null_pct_idx];
531-
let max_len = batch_gen_rng.gen_range(1..50);
534+
let max_len = batch_gen_rng.random_range(1..50);
532535

533536
let mut generator = StringArrayGenerator {
534537
max_len,
@@ -547,9 +550,9 @@ impl RecordBatchGenerator {
547550
}
548551
DataType::Binary | DataType::LargeBinary | DataType::BinaryView => {
549552
let null_pct_idx =
550-
batch_gen_rng.gen_range(0..self.candidate_null_pcts.len());
553+
batch_gen_rng.random_range(0..self.candidate_null_pcts.len());
551554
let null_pct = self.candidate_null_pcts[null_pct_idx];
552-
let max_len = batch_gen_rng.gen_range(1..100);
555+
let max_len = batch_gen_rng.random_range(1..100);
553556

554557
let mut generator = BinaryArrayGenerator {
555558
max_len,

datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ use datafusion_execution::memory_pool::{
3030
};
3131
use datafusion_expr::display_schema;
3232
use datafusion_physical_plan::spill::get_record_batch_memory_size;
33-
use rand::seq::SliceRandom;
3433
use std::time::Duration;
3534

3635
use datafusion_execution::{
3736
disk_manager::DiskManagerConfig, memory_pool::FairSpillPool,
3837
runtime_env::RuntimeEnvBuilder,
3938
};
39+
use rand::prelude::IndexedRandom;
4040
use rand::Rng;
4141
use rand::{rngs::StdRng, SeedableRng};
4242

@@ -199,16 +199,16 @@ impl SortQueryFuzzer {
199199
// Execute until either`max_rounds` or `time_limit` is reached
200200
let max_rounds = self.max_rounds.unwrap_or(usize::MAX);
201201
for round in 0..max_rounds {
202-
let init_seed = self.runner_rng.gen();
202+
let init_seed = self.runner_rng.random();
203203
for query_i in 0..self.queries_per_round {
204-
let query_seed = self.runner_rng.gen();
204+
let query_seed = self.runner_rng.random();
205205
let mut expected_results: Option<Vec<RecordBatch>> = None; // use first config's result as the expected result
206206
for config_i in 0..self.config_variations_per_query {
207207
if self.should_stop_due_to_time_limit(start_time, round, query_i) {
208208
return Ok(());
209209
}
210210

211-
let config_seed = self.runner_rng.gen();
211+
let config_seed = self.runner_rng.random();
212212

213213
println!(
214214
"[SortQueryFuzzer] Round {}, Query {} (Config {})",
@@ -300,7 +300,7 @@ impl SortFuzzerTestGenerator {
300300
let mut rng = StdRng::seed_from_u64(rng_seed);
301301
let min_ncol = min(candidate_columns.len(), 5);
302302
let max_ncol = min(candidate_columns.len(), 10);
303-
let amount = rng.gen_range(min_ncol..=max_ncol);
303+
let amount = rng.random_range(min_ncol..=max_ncol);
304304
let selected_columns = candidate_columns
305305
.choose_multiple(&mut rng, amount)
306306
.cloned()
@@ -327,7 +327,7 @@ impl SortFuzzerTestGenerator {
327327
/// memory table should be generated with more partitions, due to https://github.com/apache/datafusion/issues/15088
328328
fn init_partitioned_staggered_batches(&mut self, rng_seed: u64) {
329329
let mut rng = StdRng::seed_from_u64(rng_seed);
330-
let num_partitions = rng.gen_range(1..=self.max_partitions);
330+
let num_partitions = rng.random_range(1..=self.max_partitions);
331331

332332
let max_batch_size = self.num_rows / num_partitions / 50;
333333
let target_partition_size = self.num_rows / num_partitions;
@@ -344,7 +344,7 @@ impl SortFuzzerTestGenerator {
344344
// Generate a random batch of size between 1 and max_batch_size
345345

346346
// Let edge case (1-row batch) more common
347-
let (min_nrow, max_nrow) = if rng.gen_bool(0.1) {
347+
let (min_nrow, max_nrow) = if rng.random_bool(0.1) {
348348
(1, 3)
349349
} else {
350350
(1, max_batch_size)
@@ -355,7 +355,7 @@ impl SortFuzzerTestGenerator {
355355
max_nrow,
356356
self.selected_columns.clone(),
357357
)
358-
.with_seed(rng.gen());
358+
.with_seed(rng.random());
359359

360360
let record_batch = record_batch_generator.generate().unwrap();
361361
num_rows += record_batch.num_rows();
@@ -373,9 +373,9 @@ impl SortFuzzerTestGenerator {
373373
}
374374

375375
// After all partitions are created, optionally make one partition have 0/1 batch
376-
if num_partitions > 2 && rng.gen_bool(0.1) {
377-
let partition_index = rng.gen_range(0..num_partitions);
378-
if rng.gen_bool(0.5) {
376+
if num_partitions > 2 && rng.random_bool(0.1) {
377+
let partition_index = rng.random_range(0..num_partitions);
378+
if rng.random_bool(0.5) {
379379
// 0 batch
380380
partitions[partition_index] = Vec::new();
381381
} else {
@@ -424,7 +424,7 @@ impl SortFuzzerTestGenerator {
424424
pub fn generate_random_query(&self, rng_seed: u64) -> (String, Option<usize>) {
425425
let mut rng = StdRng::seed_from_u64(rng_seed);
426426

427-
let num_columns = rng.gen_range(1..=3).min(self.selected_columns.len());
427+
let num_columns = rng.random_range(1..=3).min(self.selected_columns.len());
428428
let selected_columns: Vec<_> = self
429429
.selected_columns
430430
.choose_multiple(&mut rng, num_columns)
@@ -433,12 +433,12 @@ impl SortFuzzerTestGenerator {
433433
let mut order_by_clauses = Vec::new();
434434
for col in selected_columns {
435435
let mut clause = col.name.clone();
436-
if rng.gen_bool(0.5) {
437-
let order = if rng.gen_bool(0.5) { "ASC" } else { "DESC" };
436+
if rng.random_bool(0.5) {
437+
let order = if rng.random_bool(0.5) { "ASC" } else { "DESC" };
438438
clause.push_str(&format!(" {}", order));
439439
}
440-
if rng.gen_bool(0.5) {
441-
let nulls = if rng.gen_bool(0.5) {
440+
if rng.random_bool(0.5) {
441+
let nulls = if rng.random_bool(0.5) {
442442
"NULLS FIRST"
443443
} else {
444444
"NULLS LAST"
@@ -450,14 +450,14 @@ impl SortFuzzerTestGenerator {
450450

451451
let dataset_size = self.dataset_state.as_ref().unwrap().dataset_size;
452452

453-
let limit = if rng.gen_bool(0.2) {
453+
let limit = if rng.random_bool(0.2) {
454454
// Prefer edge cases for k like 1, dataset_size, etc.
455-
Some(if rng.gen_bool(0.5) {
455+
Some(if rng.random_bool(0.5) {
456456
let edge_cases =
457457
[1, 2, 3, dataset_size - 1, dataset_size, dataset_size + 1];
458458
*edge_cases.choose(&mut rng).unwrap()
459459
} else {
460-
rng.gen_range(1..=dataset_size)
460+
rng.random_range(1..=dataset_size)
461461
})
462462
} else {
463463
None
@@ -487,12 +487,12 @@ impl SortFuzzerTestGenerator {
487487

488488
// 30% to 200% of the dataset size (if `with_memory_limit` is false, config
489489
// will use the default unbounded pool to override it later)
490-
let memory_limit = rng.gen_range(
490+
let memory_limit = rng.random_range(
491491
(dataset_size as f64 * 0.5) as usize..=(dataset_size as f64 * 2.0) as usize,
492492
);
493493
// 10% to 20% of the per-partition memory limit size
494494
let per_partition_mem_limit = memory_limit / num_partitions;
495-
let sort_spill_reservation_bytes = rng.gen_range(
495+
let sort_spill_reservation_bytes = rng.random_range(
496496
(per_partition_mem_limit as f64 * 0.2) as usize
497497
..=(per_partition_mem_limit as f64 * 0.3) as usize,
498498
);
@@ -505,7 +505,7 @@ impl SortFuzzerTestGenerator {
505505
0
506506
} else {
507507
let dataset_size = self.dataset_state.as_ref().unwrap().dataset_size;
508-
rng.gen_range(0..=dataset_size * 2_usize)
508+
rng.random_range(0..=dataset_size * 2_usize)
509509
};
510510

511511
// Set up strings for printing

datafusion/core/tests/memory_limit/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ async fn oom_with_tracked_consumer_pool() {
396396
.with_expected_errors(vec![
397397
"Failed to allocate additional",
398398
"for ParquetSink(ArrowColumnWriter)",
399-
"Additional allocation failed with top memory consumers (across reservations) as: ParquetSink(SerializedFileWriter)"
399+
"Additional allocation failed with top memory consumers (across reservations) as: ParquetSink(ArrowColumnWriter)"
400400
])
401401
.with_memory_pool(Arc::new(
402402
TrackConsumersPool::new(

datafusion/wasmtest/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ datafusion-expr = { workspace = true }
5252
datafusion-optimizer = { workspace = true, default-features = true }
5353
datafusion-physical-plan = { workspace = true }
5454
datafusion-sql = { workspace = true }
55-
wasm-bindgen = "0.2.99"
5655
getrandom = { version = "0.3", features = ["wasm_js"] }
56+
wasm-bindgen = "0.2.99"
5757

5858
[patch.crates-io]
5959
# getrandom must be compiled with js feature

test-utils/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ pub fn add_empty_batches(
6767
.flat_map(|batch| {
6868
// insert 0, or 1 empty batches before and after the current batch
6969
let empty_batch = RecordBatch::new_empty(schema.clone());
70-
std::iter::repeat_n(empty_batch.clone(), rng.gen_range(0..2))
70+
std::iter::repeat_n(empty_batch.clone(), rng.random_range(0..2))
7171
.chain(std::iter::once(batch))
72-
.chain(std::iter::repeat_n(empty_batch, rng.gen_range(0..2)))
72+
.chain(std::iter::repeat_n(empty_batch, rng.random_range(0..2)))
7373
})
7474
.collect()
7575
}

0 commit comments

Comments
 (0)