From a18c82f9e20c7243e114cf0802464a2c077db7ca Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Fri, 15 Nov 2024 23:57:41 +0000 Subject: [PATCH 01/11] Fix md5 return_type to only return Utf8 as per current code impl. --- datafusion/functions/src/crypto/md5.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index 0f18fd47b4cf..0e8ff1cd3192 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -64,11 +64,11 @@ impl ScalarUDFImpl for Md5Func { fn return_type(&self, arg_types: &[DataType]) -> Result { use DataType::*; Ok(match &arg_types[0] { - LargeUtf8 | LargeBinary => LargeUtf8, + LargeUtf8 | LargeBinary => Utf8, Utf8View | Utf8 | Binary => Utf8, Null => Null, Dictionary(_, t) => match **t { - LargeUtf8 | LargeBinary => LargeUtf8, + LargeUtf8 | LargeBinary => Utf8, Utf8 | Binary => Utf8, Null => Null, _ => { From b6a95d9422f330999113e3a82eb68884b7c69b18 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sat, 21 Dec 2024 19:17:02 +0000 Subject: [PATCH 02/11] ci improvements --- .github/actions/setup-builder/action.yaml | 2 + .../setup-macos-aarch64-builder/action.yaml | 6 +- .../actions/setup-macos-builder/action.yaml | 4 +- .../actions/setup-rust-runtime/action.yaml | 1 + .../actions/setup-windows-builder/action.yaml | 4 +- .github/workflows/dependencies.yml | 1 + .github/workflows/docs_pr.yaml | 1 + .github/workflows/rust.yml | 234 +++++++++++------- Cargo.toml | 11 + datafusion-cli/Cargo.toml | 11 + 10 files changed, 184 insertions(+), 91 deletions(-) diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml index 22d2f2187dd0..da487788ca55 100644 --- a/.github/actions/setup-builder/action.yaml +++ b/.github/actions/setup-builder/action.yaml @@ -42,6 +42,8 @@ runs: "${RETRY[@]}" rustup component add rustfmt - name: Configure rust runtime env uses: ./.github/actions/setup-rust-runtime + - name: Setup Rust cache + uses: Swatinem/rust-cache@v2 - name: Fixup git permissions # https://github.com/actions/checkout/issues/766 shell: bash diff --git a/.github/actions/setup-macos-aarch64-builder/action.yaml b/.github/actions/setup-macos-aarch64-builder/action.yaml index c4e14906ed10..288799a284b0 100644 --- a/.github/actions/setup-macos-aarch64-builder/action.yaml +++ b/.github/actions/setup-macos-aarch64-builder/action.yaml @@ -30,8 +30,8 @@ runs: run: | mkdir -p $HOME/d/protoc cd $HOME/d/protoc - export PROTO_ZIP="protoc-21.4-osx-aarch_64.zip" - curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP + export PROTO_ZIP="protoc-29.1-osx-aarch_64.zip" + curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v29.1/$PROTO_ZIP unzip $PROTO_ZIP echo "$HOME/d/protoc/bin" >> $GITHUB_PATH export PATH=$PATH:$HOME/d/protoc/bin @@ -43,5 +43,7 @@ runs: rustup toolchain install stable rustup default stable rustup component add rustfmt + - name: Setup rust cache + uses: Swatinem/rust-cache@v2 - name: Configure rust runtime env uses: ./.github/actions/setup-rust-runtime diff --git a/.github/actions/setup-macos-builder/action.yaml b/.github/actions/setup-macos-builder/action.yaml index 02419f617942..fffdab160b04 100644 --- a/.github/actions/setup-macos-builder/action.yaml +++ b/.github/actions/setup-macos-builder/action.yaml @@ -30,8 +30,8 @@ runs: run: | mkdir -p $HOME/d/protoc cd $HOME/d/protoc - export PROTO_ZIP="protoc-21.4-osx-x86_64.zip" - curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP + export PROTO_ZIP="protoc-29.1-osx-x86_64.zip" + curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v29.1/$PROTO_ZIP unzip $PROTO_ZIP echo "$HOME/d/protoc/bin" >> $GITHUB_PATH export PATH=$PATH:$HOME/d/protoc/bin diff --git a/.github/actions/setup-rust-runtime/action.yaml b/.github/actions/setup-rust-runtime/action.yaml index cd18be989031..709052157f6d 100644 --- a/.github/actions/setup-rust-runtime/action.yaml +++ b/.github/actions/setup-rust-runtime/action.yaml @@ -34,5 +34,6 @@ runs: echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV echo "RUST_BACKTRACE=1" >> $GITHUB_ENV + echo "CARGO_INCREMENTAL=false" >> $GITHUB_ENV echo "RUSTFLAGS=-C debuginfo=line-tables-only -C incremental=false" >> $GITHUB_ENV diff --git a/.github/actions/setup-windows-builder/action.yaml b/.github/actions/setup-windows-builder/action.yaml index 5e937358c7d7..a0304168c744 100644 --- a/.github/actions/setup-windows-builder/action.yaml +++ b/.github/actions/setup-windows-builder/action.yaml @@ -30,8 +30,8 @@ runs: run: | mkdir -p $HOME/d/protoc cd $HOME/d/protoc - export PROTO_ZIP="protoc-21.4-win64.zip" - curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP + export PROTO_ZIP="protoc-29.1-win64.zip" + curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v29.1/$PROTO_ZIP unzip $PROTO_ZIP export PATH=$PATH:$HOME/d/protoc/bin protoc.exe --version diff --git a/.github/workflows/dependencies.yml b/.github/workflows/dependencies.yml index ebc5bcf91c94..f87215565bb5 100644 --- a/.github/workflows/dependencies.yml +++ b/.github/workflows/dependencies.yml @@ -42,6 +42,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: diff --git a/.github/workflows/docs_pr.yaml b/.github/workflows/docs_pr.yaml index c2f3dd684a23..3fad08643aa2 100644 --- a/.github/workflows/docs_pr.yaml +++ b/.github/workflows/docs_pr.yaml @@ -43,6 +43,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 71ff44e43d21..5eb02b6acfa8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -47,9 +47,9 @@ jobs: - uses: actions/checkout@v4 - uses: korandoru/hawkeye@v5 - # Check crate compiles + # Check crate compiles and base cargo check passes linux-build-lib: - name: cargo check + name: linux build test runs-on: ubuntu-latest container: image: amd64/rust @@ -59,92 +59,119 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable + - name: Prepare cargo build + run: cargo check --profile ci --all-targets - - name: Cache Cargo - uses: actions/cache@v4 + # cargo check common, functions and substrait with no default features + linux-cargo-check-no-default-features: + name: cargo check no default features + needs: linux-build-lib + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - ./target/ - ./datafusion-cli/target/ - key: cargo-cache-${{ hashFiles('**/Cargo.toml', '**/Cargo.lock') }} - + rust-version: stable - name: Check datafusion without default features # Some of the test binaries require the parquet feature still #run: cargo check --all-targets --no-default-features -p datafusion - run: cargo check --no-default-features -p datafusion + run: cargo check --profile ci --no-default-features -p datafusion - name: Check datafusion-common without default features - run: cargo check --all-targets --no-default-features -p datafusion-common + run: cargo check --profile ci --all-targets --no-default-features -p datafusion-common - name: Check datafusion-functions without default features - run: cargo check --all-targets --no-default-features -p datafusion-functions + run: cargo check --profile ci --all-targets --no-default-features -p datafusion-functions - name: Check datafusion-substrait without default features - run: cargo check --all-targets --no-default-features -p datafusion-substrait + run: cargo check --profile ci --all-targets --no-default-features -p datafusion-substrait - name: Check workspace in debug mode - run: cargo check --all-targets --workspace + run: cargo check --profile ci --all-targets --workspace - name: Check workspace with avro,json features - run: cargo check --workspace --benches --features avro,json + run: cargo check --profile ci --workspace --benches --features avro,json - name: Check Cargo.lock for datafusion-cli run: | # If this test fails, try running `cargo update` in the `datafusion-cli` directory # and check in the updated Cargo.lock file. - cargo check --manifest-path datafusion-cli/Cargo.toml --locked + cargo check --profile ci --manifest-path datafusion-cli/Cargo.toml --locked - # Ensure that the datafusion crate can be built with only a subset of the function - # packages enabled. + # cargo check datafusion to ensure that the datafusion crate can be built with only a + # subset of the function packages enabled. + linux-cargo-check-datafusion: + name: cargo check datafusion + needs: linux-build-lib + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: stable - name: Check datafusion (nested_expressions) - run: cargo check --no-default-features --features=nested_expressions -p datafusion + run: cargo check --profile ci --no-default-features --features=nested_expressions -p datafusion - name: Check datafusion (crypto) - run: cargo check --no-default-features --features=crypto_expressions -p datafusion + run: cargo check --profile ci --no-default-features --features=crypto_expressions -p datafusion - name: Check datafusion (datetime_expressions) - run: cargo check --no-default-features --features=datetime_expressions -p datafusion + run: cargo check --profile ci --no-default-features --features=datetime_expressions -p datafusion - name: Check datafusion (encoding_expressions) - run: cargo check --no-default-features --features=encoding_expressions -p datafusion + run: cargo check --profile ci --no-default-features --features=encoding_expressions -p datafusion - name: Check datafusion (math_expressions) - run: cargo check --no-default-features --features=math_expressions -p datafusion + run: cargo check --profile ci --no-default-features --features=math_expressions -p datafusion - name: Check datafusion (regex_expressions) - run: cargo check --no-default-features --features=regex_expressions -p datafusion + run: cargo check --profile ci --no-default-features --features=regex_expressions -p datafusion - name: Check datafusion (string_expressions) - run: cargo check --no-default-features --features=string_expressions -p datafusion + run: cargo check --profile ci --no-default-features --features=string_expressions -p datafusion - # Ensure that the datafusion-functions crate can be built with only a subset of the function - # packages enabled. + # cargo check datafusion-functions to ensure that the datafusion-functions crate can be built with + # only a subset of the function packages enabled. + linux-cargo-check-datafusion-functions: + name: cargo check functions + needs: linux-build-lib + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: stable - name: Check datafusion-functions (crypto) - run: cargo check --all-targets --no-default-features --features=crypto_expressions -p datafusion-functions + run: cargo check --profile ci --all-targets --no-default-features --features=crypto_expressions -p datafusion-functions - name: Check datafusion-functions (datetime_expressions) - run: cargo check --all-targets --no-default-features --features=datetime_expressions -p datafusion-functions + run: cargo check --profile ci --all-targets --no-default-features --features=datetime_expressions -p datafusion-functions - name: Check datafusion-functions (encoding_expressions) - run: cargo check --all-targets --no-default-features --features=encoding_expressions -p datafusion-functions + run: cargo check --profile ci --all-targets --no-default-features --features=encoding_expressions -p datafusion-functions - name: Check datafusion-functions (math_expressions) - run: cargo check --all-targets --no-default-features --features=math_expressions -p datafusion-functions + run: cargo check --profile ci --all-targets --no-default-features --features=math_expressions -p datafusion-functions - name: Check datafusion-functions (regex_expressions) - run: cargo check --all-targets --no-default-features --features=regex_expressions -p datafusion-functions + run: cargo check --profile ci --all-targets --no-default-features --features=regex_expressions -p datafusion-functions - name: Check datafusion-functions (string_expressions) - run: cargo check --all-targets --no-default-features --features=string_expressions -p datafusion-functions + run: cargo check --profile ci --all-targets --no-default-features --features=string_expressions -p datafusion-functions # Run tests linux-test: name: cargo test (amd64) - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -152,18 +179,23 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: - rust-version: stable + rust-version: stable + - name: Install nextest + uses: taiki-e/install-action@nextest - name: Run tests (excluding doctests) - run: cargo test --lib --tests --bins --features avro,json,backtrace + run: cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --bins --features avro,json,backtrace + - name: Run sqllogictests + run: cargo test --profile ci --package datafusion-sqllogictest --test sqllogictests - name: Verify Working Directory Clean run: git diff --exit-code linux-test-datafusion-cli: name: cargo test datafusion-cli (amd64) - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -171,20 +203,23 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: rust-version: stable + - name: Install nextest + uses: taiki-e/install-action@nextest - name: Run tests (excluding doctests) run: | cd datafusion-cli - cargo test --lib --tests --bins --all-features + cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --lib --tests --bins --all-features - name: Verify Working Directory Clean run: git diff --exit-code linux-test-example: name: cargo examples (amd64) - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -192,6 +227,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: @@ -199,18 +235,16 @@ jobs: - name: Run examples run: | # test datafusion-sql examples - cargo run --example sql + cargo run --profile ci --example sql # test datafusion-examples ci/scripts/rust_example.sh - name: Verify Working Directory Clean run: git diff --exit-code - - # Run `cargo test doc` (test documentation examples) linux-test-doc: name: cargo test doc (amd64) - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -218,22 +252,23 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: rust-version: stable - name: Run doctests run: | - cargo test --doc --features avro,json + cargo test --profile ci --doc --features avro,json cd datafusion-cli - cargo test --doc --all-features + cargo test --profile ci --doc --all-features - name: Verify Working Directory Clean run: git diff --exit-code # Run `cargo doc` to ensure the rustdoc is clean linux-rustdoc: name: cargo doc - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -266,7 +301,7 @@ jobs: # verify that the benchmark queries return the correct results verify-benchmark-results: name: verify benchmark results (amd64) - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -274,6 +309,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: @@ -288,17 +324,20 @@ jobs: mv *.tbl ../datafusion/sqllogictest/test_files/tpch/data - name: Verify that benchmark queries return expected results run: | + # increase stack size to fix stack overflow + export RUST_MIN_STACK=20971520 export TPCH_DATA=`realpath datafusion/sqllogictest/test_files/tpch/data` - # use release build for plan verificaton because debug build causes stack overflow - cargo test plan_q --package datafusion-benchmarks --profile release-nonlto --features=ci -- --test-threads=1 - INCLUDE_TPCH=true cargo test --test sqllogictests + cargo test plan_q --package datafusion-benchmarks --profile ci --features=ci -- --test-threads=1 + INCLUDE_TPCH=true cargo test --profile ci --package datafusion-sqllogictest --test sqllogictests - name: Verify Working Directory Clean run: git diff --exit-code sqllogictest-postgres: name: "Run sqllogictest with Postgres runner" - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest + container: + image: amd64/rust services: postgres: image: postgres:15 @@ -307,7 +346,7 @@ jobs: POSTGRES_DB: db_test POSTGRES_INITDB_ARGS: --encoding=UTF-8 --lc-collate=C --lc-ctype=C ports: - - 5432/tcp + - 5432:5432 options: >- --health-cmd pg_isready --health-interval 10s @@ -317,13 +356,18 @@ jobs: - uses: actions/checkout@v4 with: submodules: true - - name: Setup toolchain - run: | - rustup toolchain install stable - rustup default stable + fetch-depth: 1 + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: stable - name: Run sqllogictest - run: PG_COMPAT=true PG_URI="postgresql://postgres:postgres@localhost:$POSTGRES_PORT/db_test" cargo test --features=postgres --test sqllogictests + run: | + cd datafusion/sqllogictest + PG_COMPAT=true PG_URI="postgresql://postgres:postgres@$POSTGRES_HOST:$POSTGRES_PORT/db_test" cargo test --profile ci --features=postgres --test sqllogictests env: + # use postgres for the host here because we have specified a container for the job + POSTGRES_HOST: postgres POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }} # Temporarily commenting out the Windows flow, the reason is enormously slow running build @@ -347,21 +391,26 @@ jobs: # cd datafusion-cli # cargo test --lib --tests --bins --all-features - macos: - name: cargo test (macos) - runs-on: macos-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - name: Setup Rust toolchain - uses: ./.github/actions/setup-macos-builder - - name: Run tests (excluding doctests) - shell: bash - run: | - cargo test --lib --tests --bins --features avro,json,backtrace - cd datafusion-cli - cargo test --lib --tests --bins --all-features +# Commenting out intel mac build as so few users would ever use it +# macos: +# name: cargo test (macos) +# runs-on: macos-latest +# steps: +# - uses: actions/checkout@v4 +# with: +# submodules: true +# fetch-depth: 1 +# - name: Setup Rust toolchain +# uses: ./.github/actions/setup-macos-builder +# - name: Install nextest +# uses: taiki-e/install-action@nextest +# - name: Run tests (excluding doctests) +# shell: bash +# run: | +# cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --bins --features avro,json,backtrace +# cargo test --profile ci --package datafusion-sqllogictest --test sqllogictests +# cd datafusion-cli +# cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --lib --tests --bins --all-features macos-aarch64: name: cargo test (macos-aarch64) @@ -370,18 +419,22 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-macos-aarch64-builder + - name: Install nextest + uses: taiki-e/install-action@nextest - name: Run tests (excluding doctests) shell: bash run: | - cargo test --lib --tests --bins --features avro,json,backtrace + cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --bins --features avro,json,backtrace + cargo test --profile ci --package datafusion-sqllogictest --test sqllogictests cd datafusion-cli - cargo test --lib --tests --bins --all-features + cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --lib --tests --bins --all-features test-datafusion-pyarrow: name: cargo test pyarrow (amd64) - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-20.04 container: image: amd64/rust:bullseye # Workaround https://github.com/actions/setup-python/issues/721 @@ -389,6 +442,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - uses: actions/setup-python@v5 with: python-version: "3.8" @@ -400,8 +454,10 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable + - name: Install nextest + uses: taiki-e/install-action@nextest - name: Run datafusion-common tests - run: cargo test -p datafusion-common --features=pyarrow + run: cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci -p datafusion-common --features=pyarrow vendor: name: Verify Vendored Code @@ -412,6 +468,8 @@ jobs: - uses: actions/checkout@v4 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder + with: + rust-version: stable - name: Run gen run: ./regen.sh working-directory: ./datafusion/proto @@ -478,7 +536,7 @@ jobs: clippy: name: clippy - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -486,6 +544,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: @@ -498,7 +557,7 @@ jobs: # Check answers are correct when hash values collide hash-collisions: name: cargo test hash collisions (amd64) - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -506,18 +565,21 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: rust-version: stable + - name: Install nextest + uses: taiki-e/install-action@nextest - name: Run tests run: | cd datafusion - cargo test --lib --tests --features=force_hash_collisions,avro + cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --features=force_hash_collisions,avro cargo-toml-formatting-checks: name: check Cargo.toml formatting - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -525,6 +587,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: @@ -537,7 +600,7 @@ jobs: config-docs-check: name: check configs.md and ***_functions.md is up-to-date - needs: [ linux-build-lib ] + needs: linux-build-lib runs-on: ubuntu-latest container: image: amd64/rust @@ -545,6 +608,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: true + fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: diff --git a/Cargo.toml b/Cargo.toml index b7c8c09a8537..a470589fddcd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -170,6 +170,17 @@ overflow-checks = false panic = 'unwind' rpath = false +[profile.ci] +inherits = "dev" +incremental = false + +# ci turns off debug info, etc for dependencies to allow for smaller binaries making caching more effective +[profile.ci.package."*"] +debug = false +debug-assertions = false +strip = "debuginfo" +incremental = false + [workspace.lints.clippy] # Detects large stack-allocated futures that may cause stack overflow crashes (see threshold in clippy.toml) large_futures = "warn" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 4cdc2120a029..f2059275aa1c 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -69,3 +69,14 @@ assert_cmd = "2.0" ctor = "0.2.0" predicates = "3.0" rstest = "0.22" + +[profile.ci] +inherits = "dev" +incremental = false + +# ci turns off debug info, etc for dependencies to allow for smaller binaries making caching more effective +[profile.ci.package."*"] +debug = false +debug-assertions = false +strip = "debuginfo" +incremental = false From 9b8b8f6cedcc44e5e7a623d9024df28d8f3ca928 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sat, 21 Dec 2024 23:07:29 +0000 Subject: [PATCH 03/11] Lock taiki-e/install-action to a githash for apache action policy - Release 2.46.19 in the case of this hash. --- .github/workflows/rust.yml | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 5eb02b6acfa8..e6fc184127a6 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -185,7 +185,9 @@ jobs: with: rust-version: stable - name: Install nextest - uses: taiki-e/install-action@nextest + uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + with: + tool: cargo-nextest - name: Run tests (excluding doctests) run: cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --bins --features avro,json,backtrace - name: Run sqllogictests @@ -209,7 +211,9 @@ jobs: with: rust-version: stable - name: Install nextest - uses: taiki-e/install-action@nextest + uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + with: + tool: cargo-nextest - name: Run tests (excluding doctests) run: | cd datafusion-cli @@ -403,7 +407,9 @@ jobs: # - name: Setup Rust toolchain # uses: ./.github/actions/setup-macos-builder # - name: Install nextest -# uses: taiki-e/install-action@nextest +# uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e +# with: +# tool: cargo-nextest # - name: Run tests (excluding doctests) # shell: bash # run: | @@ -423,7 +429,9 @@ jobs: - name: Setup Rust toolchain uses: ./.github/actions/setup-macos-aarch64-builder - name: Install nextest - uses: taiki-e/install-action@nextest + uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + with: + tool: cargo-nextest - name: Run tests (excluding doctests) shell: bash run: | @@ -455,7 +463,9 @@ jobs: with: rust-version: stable - name: Install nextest - uses: taiki-e/install-action@nextest + uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + with: + tool: cargo-nextest - name: Run datafusion-common tests run: cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci -p datafusion-common --features=pyarrow @@ -571,7 +581,9 @@ jobs: with: rust-version: stable - name: Install nextest - uses: taiki-e/install-action@nextest + uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + with: + tool: cargo-nextest - name: Run tests run: | cd datafusion From b8270484bb79f8368c2bf023d7717d0fabbe749e Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sat, 21 Dec 2024 23:09:01 +0000 Subject: [PATCH 04/11] Lock taiki-e/install-action to a githash for apache action policy - Release 2.46.19 in the case of this hash. --- .github/workflows/rust.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e6fc184127a6..7f8bb0948017 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -185,7 +185,7 @@ jobs: with: rust-version: stable - name: Install nextest - uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + uses: taiki-e/install-action@e37f44c with: tool: cargo-nextest - name: Run tests (excluding doctests) @@ -211,7 +211,7 @@ jobs: with: rust-version: stable - name: Install nextest - uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + uses: taiki-e/install-action@e37f44c with: tool: cargo-nextest - name: Run tests (excluding doctests) @@ -407,7 +407,7 @@ jobs: # - name: Setup Rust toolchain # uses: ./.github/actions/setup-macos-builder # - name: Install nextest -# uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e +# uses: taiki-e/install-action@e37f44c # with: # tool: cargo-nextest # - name: Run tests (excluding doctests) @@ -429,7 +429,7 @@ jobs: - name: Setup Rust toolchain uses: ./.github/actions/setup-macos-aarch64-builder - name: Install nextest - uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + uses: taiki-e/install-action@e37f44c with: tool: cargo-nextest - name: Run tests (excluding doctests) @@ -463,7 +463,7 @@ jobs: with: rust-version: stable - name: Install nextest - uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + uses: taiki-e/install-action@e37f44c with: tool: cargo-nextest - name: Run datafusion-common tests @@ -581,7 +581,7 @@ jobs: with: rust-version: stable - name: Install nextest - uses: taiki-e/install-action@e37f44cf370f5ab2495787a3f6f30f092bc9a53e + uses: taiki-e/install-action@e37f44c with: tool: cargo-nextest - name: Run tests From d47d535fa246d323d551d0e4068256e0e328e0b2 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sat, 21 Dec 2024 23:55:39 +0000 Subject: [PATCH 05/11] Revert nextest change until action is approved. --- .github/workflows/rust.yml | 44 +++++++------------------------------- 1 file changed, 8 insertions(+), 36 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 7f8bb0948017..db39371a2c9a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -184,14 +184,8 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable - - name: Install nextest - uses: taiki-e/install-action@e37f44c - with: - tool: cargo-nextest - name: Run tests (excluding doctests) - run: cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --bins --features avro,json,backtrace - - name: Run sqllogictests - run: cargo test --profile ci --package datafusion-sqllogictest --test sqllogictests + run: cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --lib --tests --bins --features avro,json,backtrace - name: Verify Working Directory Clean run: git diff --exit-code @@ -210,14 +204,10 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable - - name: Install nextest - uses: taiki-e/install-action@e37f44c - with: - tool: cargo-nextest - name: Run tests (excluding doctests) run: | cd datafusion-cli - cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --lib --tests --bins --all-features + cargo test --profile ci --lib --tests --bins --all-features - name: Verify Working Directory Clean run: git diff --exit-code @@ -406,17 +396,12 @@ jobs: # fetch-depth: 1 # - name: Setup Rust toolchain # uses: ./.github/actions/setup-macos-builder -# - name: Install nextest -# uses: taiki-e/install-action@e37f44c -# with: -# tool: cargo-nextest # - name: Run tests (excluding doctests) # shell: bash # run: | -# cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --bins --features avro,json,backtrace -# cargo test --profile ci --package datafusion-sqllogictest --test sqllogictests +# cargo test run --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --workspace --lib --tests --bins --features avro,json,backtrace # cd datafusion-cli -# cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --lib --tests --bins --all-features +# cargo test run --profile ci --lib --tests --bins --all-features macos-aarch64: name: cargo test (macos-aarch64) @@ -428,17 +413,12 @@ jobs: fetch-depth: 1 - name: Setup Rust toolchain uses: ./.github/actions/setup-macos-aarch64-builder - - name: Install nextest - uses: taiki-e/install-action@e37f44c - with: - tool: cargo-nextest - name: Run tests (excluding doctests) shell: bash run: | - cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --bins --features avro,json,backtrace - cargo test --profile ci --package datafusion-sqllogictest --test sqllogictests + cargo test --profile ci --lib --tests --bins --features avro,json,backtrace cd datafusion-cli - cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --lib --tests --bins --all-features + cargo test --profile ci --lib --tests --bins --all-features test-datafusion-pyarrow: name: cargo test pyarrow (amd64) @@ -462,12 +442,8 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable - - name: Install nextest - uses: taiki-e/install-action@e37f44c - with: - tool: cargo-nextest - name: Run datafusion-common tests - run: cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci -p datafusion-common --features=pyarrow + run: cargo test --profile ci -p datafusion-common --features=pyarrow vendor: name: Verify Vendored Code @@ -580,14 +556,10 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable - - name: Install nextest - uses: taiki-e/install-action@e37f44c - with: - tool: cargo-nextest - name: Run tests run: | cd datafusion - cargo nextest run --hide-progress-bar --no-fail-fast --cargo-profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --features=force_hash_collisions,avro + cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --features=force_hash_collisions,avro cargo-toml-formatting-checks: name: check Cargo.toml formatting From be7e273c4dafed5eb7e610280940ea9d927feede Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sun, 22 Dec 2024 00:02:32 +0000 Subject: [PATCH 06/11] Exclude requires workspace --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index db39371a2c9a..97212a180196 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -185,7 +185,7 @@ jobs: with: rust-version: stable - name: Run tests (excluding doctests) - run: cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --lib --tests --bins --features avro,json,backtrace + run: cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --workspace --lib --tests --bins --features avro,json,backtrace - name: Verify Working Directory Clean run: git diff --exit-code From 62230f801b2d068b3b062b9387ad352ae53db606 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sun, 22 Dec 2024 00:24:50 +0000 Subject: [PATCH 07/11] Fixing minor typo to verify ci caching of builds is working as expected. --- .../sqllogictest/src/engines/datafusion_engine/normalize.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index b80f0ef075ff..ced497de22a7 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -28,7 +28,7 @@ use std::sync::LazyLock; use super::super::conversion::*; use super::error::{DFSqlLogicTestError, Result}; -/// Converts `batches` to a result as expected by sqllogicteset. +/// Converts `batches` to a result as expected by sqllogictest. pub(crate) fn convert_batches(batches: Vec) -> Result>> { if batches.is_empty() { Ok(vec![]) From 03509ba4b803feb5a4a161250672e0cdcd3994bc Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Tue, 24 Dec 2024 15:46:58 +0000 Subject: [PATCH 08/11] Updates from PR review. --- .github/actions/setup-builder/action.yaml | 2 -- .github/actions/setup-rust-runtime/action.yaml | 1 - 2 files changed, 3 deletions(-) diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml index da487788ca55..22d2f2187dd0 100644 --- a/.github/actions/setup-builder/action.yaml +++ b/.github/actions/setup-builder/action.yaml @@ -42,8 +42,6 @@ runs: "${RETRY[@]}" rustup component add rustfmt - name: Configure rust runtime env uses: ./.github/actions/setup-rust-runtime - - name: Setup Rust cache - uses: Swatinem/rust-cache@v2 - name: Fixup git permissions # https://github.com/actions/checkout/issues/766 shell: bash diff --git a/.github/actions/setup-rust-runtime/action.yaml b/.github/actions/setup-rust-runtime/action.yaml index 709052157f6d..cd18be989031 100644 --- a/.github/actions/setup-rust-runtime/action.yaml +++ b/.github/actions/setup-rust-runtime/action.yaml @@ -34,6 +34,5 @@ runs: echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV echo "RUST_BACKTRACE=1" >> $GITHUB_ENV - echo "CARGO_INCREMENTAL=false" >> $GITHUB_ENV echo "RUSTFLAGS=-C debuginfo=line-tables-only -C incremental=false" >> $GITHUB_ENV From b9e8ecd5802cac7bf0ca9e1052db649fa7e7bf80 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Tue, 24 Dec 2024 15:48:43 +0000 Subject: [PATCH 09/11] Adding issue link for disabling intel mac build --- .github/workflows/rust.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 97212a180196..f5b6eece9dc8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -386,6 +386,7 @@ jobs: # cargo test --lib --tests --bins --all-features # Commenting out intel mac build as so few users would ever use it +# Details: https://github.com/apache/datafusion/issues/13846 # macos: # name: cargo test (macos) # runs-on: macos-latest From 64c7d4b1e3c9eea4d009a4e518cc07d8d4af659e Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Wed, 25 Dec 2024 21:33:35 +0000 Subject: [PATCH 10/11] improve performance of running examples --- ci/scripts/rust_example.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ci/scripts/rust_example.sh b/ci/scripts/rust_example.sh index 1bb97c88106f..f125f89b139f 100755 --- a/ci/scripts/rust_example.sh +++ b/ci/scripts/rust_example.sh @@ -17,9 +17,14 @@ # specific language governing permissions and limitations # under the License. -set -ex +set -e + +export CARGO_PROFILE_CI_OPT_LEVEL="s" +export CARGO_PROFILE_CI_STRIP=true + cd datafusion-examples/examples/ -cargo check --examples +cargo check --profile ci --examples +cargo build --profile ci --examples files=$(ls .) for filename in $files @@ -27,7 +32,6 @@ do example_name=`basename $filename ".rs"` # Skip tests that rely on external storage and flight if [ ! -d $filename ]; then - cargo run --example $example_name - cargo clean -p datafusion-examples + cargo run --profile ci --example $example_name fi done From 8d21ec1641f4ec6646a9dbe2ae459f5fb8875a35 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Thu, 26 Dec 2024 01:26:56 +0000 Subject: [PATCH 11/11] remove cargo check --- ci/scripts/rust_example.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/scripts/rust_example.sh b/ci/scripts/rust_example.sh index f125f89b139f..c3efcf2cf2e9 100755 --- a/ci/scripts/rust_example.sh +++ b/ci/scripts/rust_example.sh @@ -23,7 +23,6 @@ export CARGO_PROFILE_CI_OPT_LEVEL="s" export CARGO_PROFILE_CI_STRIP=true cd datafusion-examples/examples/ -cargo check --profile ci --examples cargo build --profile ci --examples files=$(ls .)