Skip to content

Commit 2045495

Browse files
committed
Merge branch 'main' into bugfix/do-not-normalize-values
2 parents 0574ab8 + d3cfc45 commit 2045495

File tree

207 files changed

+3522
-2107
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

207 files changed

+3522
-2107
lines changed

.github/workflows/rust.yml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,12 @@ jobs:
8080
- name: Check datafusion-common without default features
8181
run: cargo check --all-targets --no-default-features -p datafusion-common
8282

83-
- name: Check datafusion-functions
83+
- name: Check datafusion-functions without default features
8484
run: cargo check --all-targets --no-default-features -p datafusion-functions
8585

86+
- name: Check datafusion-substrait without default features
87+
run: cargo check --all-targets --no-default-features -p datafusion-substrait
88+
8689
- name: Check workspace in debug mode
8790
run: cargo check --all-targets --workspace
8891

@@ -582,9 +585,9 @@ jobs:
582585
#
583586
# To reproduce:
584587
# 1. Install the version of Rust that is failing. Example:
585-
# rustup install 1.79.0
588+
# rustup install 1.80.1
586589
# 2. Run the command that failed with that version. Example:
587-
# cargo +1.79.0 check -p datafusion
590+
# cargo +1.80.1 check -p datafusion
588591
#
589592
# To resolve, either:
590593
# 1. Change your code to use older Rust features,
@@ -603,4 +606,4 @@ jobs:
603606
run: cargo msrv --output-format json --log-target stdout verify
604607
- name: Check datafusion-cli
605608
working-directory: datafusion-cli
606-
run: cargo msrv --output-format json --log-target stdout verify
609+
run: cargo msrv --output-format json --log-target stdout verify

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ datafusion/sqllogictest/test_files/scratch*
6767
# temp file for core
6868
datafusion/core/*.parquet
6969

70+
# Generated core benchmark data
71+
datafusion/core/benches/data/*
72+
7073
# rat
7174
filtered_rat.txt
7275
rat.txt

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ members = [
3030
"datafusion/functions",
3131
"datafusion/functions-aggregate",
3232
"datafusion/functions-aggregate-common",
33+
"datafusion/functions-table",
3334
"datafusion/functions-nested",
3435
"datafusion/functions-window",
3536
"datafusion/functions-window-common",
@@ -64,7 +65,7 @@ homepage = "https://datafusion.apache.org"
6465
license = "Apache-2.0"
6566
readme = "README.md"
6667
repository = "https://github.com/apache/datafusion"
67-
rust-version = "1.79"
68+
rust-version = "1.80.1"
6869
version = "43.0.0"
6970

7071
[workspace.dependencies]
@@ -110,6 +111,7 @@ datafusion-functions = { path = "datafusion/functions", version = "43.0.0" }
110111
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "43.0.0" }
111112
datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "43.0.0" }
112113
datafusion-functions-nested = { path = "datafusion/functions-nested", version = "43.0.0" }
114+
datafusion-functions-table = { path = "datafusion/functions-table", version = "43.0.0" }
113115
datafusion-functions-window = { path = "datafusion/functions-window", version = "43.0.0" }
114116
datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "43.0.0" }
115117
datafusion-macros = { path = "datafusion/macros", version = "43.0.0" }

README.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,17 @@ Optional features:
126126

127127
## Rust Version Compatibility Policy
128128

129-
DataFusion's Minimum Required Stable Rust Version (MSRV) policy is to support stable [4 latest
130-
Rust versions](https://releases.rs) OR the stable minor Rust version as of 4 months, whichever is lower.
129+
The Rust toolchain releases are tracked at [Rust Versions](https://releases.rs) and follow
130+
[semantic versioning](https://semver.org/). A Rust toolchain release can be identified
131+
by a version string like `1.80.0`, or more generally `major.minor.patch`.
132+
133+
DataFusion's supports the last 4 stable Rust minor versions released and any such versions released within the last 4 months.
131134

132135
For example, given the releases `1.78.0`, `1.79.0`, `1.80.0`, `1.80.1` and `1.81.0` DataFusion will support 1.78.0, which is 3 minor versions prior to the most minor recent `1.81`.
133136

134-
If a hotfix is released for the minimum supported Rust version (MSRV), the MSRV will be the minor version with all hotfixes, even if it surpasses the four-month window.
137+
Note: If a Rust hotfix is released for the current MSRV, the MSRV will be updated to the specific minor version that includes all applicable hotfixes preceding other policies.
135138

136-
We enforce this policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
139+
DataFusion enforces MSRV policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
137140

138141
## DataFusion API evolution policy
139142

benchmarks/src/bin/external_aggr.rs

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
use std::collections::HashMap;
2121
use std::path::PathBuf;
2222
use std::sync::Arc;
23-
use std::sync::OnceLock;
23+
use std::sync::LazyLock;
2424
use structopt::StructOpt;
2525

2626
use arrow::record_batch::RecordBatch;
@@ -33,7 +33,8 @@ use datafusion::datasource::{MemTable, TableProvider};
3333
use datafusion::error::Result;
3434
use datafusion::execution::memory_pool::FairSpillPool;
3535
use datafusion::execution::memory_pool::{human_readable_size, units};
36-
use datafusion::execution::runtime_env::RuntimeConfig;
36+
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
37+
use datafusion::execution::SessionStateBuilder;
3738
use datafusion::physical_plan::display::DisplayableExecutionPlan;
3839
use datafusion::physical_plan::{collect, displayable};
3940
use datafusion::prelude::*;
@@ -90,7 +91,13 @@ struct QueryResult {
9091
/// Memory limits to run: 64MiB, 32MiB, 16MiB
9192
/// Q2 requires 250MiB for aggregation
9293
/// Memory limits to run: 512MiB, 256MiB, 128MiB, 64MiB, 32MiB
93-
static QUERY_MEMORY_LIMITS: OnceLock<HashMap<usize, Vec<u64>>> = OnceLock::new();
94+
static QUERY_MEMORY_LIMITS: LazyLock<HashMap<usize, Vec<u64>>> = LazyLock::new(|| {
95+
use units::*;
96+
let mut map = HashMap::new();
97+
map.insert(1, vec![64 * MB, 32 * MB, 16 * MB]);
98+
map.insert(2, vec![512 * MB, 256 * MB, 128 * MB, 64 * MB, 32 * MB]);
99+
map
100+
});
94101

95102
impl ExternalAggrConfig {
96103
const AGGR_TABLES: [&'static str; 1] = ["lineitem"];
@@ -113,16 +120,6 @@ impl ExternalAggrConfig {
113120
"#,
114121
];
115122

116-
fn init_query_memory_limits() -> &'static HashMap<usize, Vec<u64>> {
117-
use units::*;
118-
QUERY_MEMORY_LIMITS.get_or_init(|| {
119-
let mut map = HashMap::new();
120-
map.insert(1, vec![64 * MB, 32 * MB, 16 * MB]);
121-
map.insert(2, vec![512 * MB, 256 * MB, 128 * MB, 64 * MB, 32 * MB]);
122-
map
123-
})
124-
}
125-
126123
/// If `--query` and `--memory-limit` is not speicified, run all queries
127124
/// with pre-configured memory limits
128125
/// If only `--query` is specified, run the query with all memory limits
@@ -160,8 +157,7 @@ impl ExternalAggrConfig {
160157
query_executions.push((query_id, limit));
161158
}
162159
None => {
163-
let memory_limits_table = Self::init_query_memory_limits();
164-
let memory_limits = memory_limits_table.get(&query_id).unwrap();
160+
let memory_limits = QUERY_MEMORY_LIMITS.get(&query_id).unwrap();
165161
for limit in memory_limits {
166162
query_executions.push((query_id, *limit));
167163
}
@@ -195,10 +191,15 @@ impl ExternalAggrConfig {
195191
let query_name =
196192
format!("Q{query_id}({})", human_readable_size(mem_limit as usize));
197193
let config = self.common.config();
198-
let runtime_config = RuntimeConfig::new()
194+
let runtime_env = RuntimeEnvBuilder::new()
199195
.with_memory_pool(Arc::new(FairSpillPool::new(mem_limit as usize)))
200196
.build_arc()?;
201-
let ctx = SessionContext::new_with_config_rt(config, runtime_config);
197+
let state = SessionStateBuilder::new()
198+
.with_config(config)
199+
.with_runtime_env(runtime_env)
200+
.with_default_features()
201+
.build();
202+
let ctx = SessionContext::from(state);
202203

203204
// register tables
204205
self.register_tables(&ctx).await?;

benchmarks/src/sort_tpch.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use datafusion::datasource::listing::{
3232
};
3333
use datafusion::datasource::{MemTable, TableProvider};
3434
use datafusion::error::Result;
35-
use datafusion::execution::runtime_env::RuntimeConfig;
35+
use datafusion::execution::SessionStateBuilder;
3636
use datafusion::physical_plan::display::DisplayableExecutionPlan;
3737
use datafusion::physical_plan::{displayable, execute_stream};
3838
use datafusion::prelude::*;
@@ -188,9 +188,11 @@ impl RunOpt {
188188
/// Benchmark query `query_id` in `SORT_QUERIES`
189189
async fn benchmark_query(&self, query_id: usize) -> Result<Vec<QueryResult>> {
190190
let config = self.common.config();
191-
192-
let runtime_config = RuntimeConfig::new().build_arc()?;
193-
let ctx = SessionContext::new_with_config_rt(config, runtime_config);
191+
let state = SessionStateBuilder::new()
192+
.with_config(config)
193+
.with_default_features()
194+
.build();
195+
let ctx = SessionContext::from(state);
194196

195197
// register tables
196198
self.register_tables(&ctx).await?;

0 commit comments

Comments
 (0)