Skip to content

Commit

Permalink
Merge remote-tracking branch 'datafusion-orc/split-datafusion-integra…
Browse files Browse the repository at this point in the history
…tion' into donation

Signed-off-by: Ruihang Xia <[email protected]>
  • Loading branch information
waynexia committed Oct 24, 2024
2 parents d7f41d9 + bc498c1 commit 6c63cae
Show file tree
Hide file tree
Showing 205 changed files with 14,054 additions and 1 deletion.
19 changes: 19 additions & 0 deletions .config/nextest.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[profile.default]
slow-timeout = { period = "60s", terminate-after = 3, grace-period = "30s" }
199 changes: 199 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
paths-ignore:
- 'docs/**'
- 'config/**'
- '**.md'
- '.dockerignore'
- 'docker/**'
- '.gitignore'
push:
branches:
- develop
- main
paths-ignore:
- 'docs/**'
- 'config/**'
- '**.md'
- '.dockerignore'
- 'docker/**'
- '.gitignore'
workflow_dispatch:

name: CI

env:
RUST_TOOLCHAIN: stable

jobs:
typos:
name: Spell Check with Typos
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: crate-ci/[email protected]

check:
name: Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
matrix:
features:
- ''
- '--no-default-features'
- '--all-features'
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Run cargo check
run: cargo check --workspace --all-targets ${{ matrix.features }}

examples:
name: Examples
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Run cargo examples
run: cargo run --example datafusion_integration --all-features

toml:
name: Toml Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install taplo
run: cargo install taplo-cli --version ^0.8 --locked
- name: Run taplo
run: taplo format --check

fmt:
name: Rustfmt
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
components: rustfmt
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Run cargo fmt
run: cargo fmt --all -- --check

clippy:
name: Clippy
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
matrix:
features:
- ''
- '--no-default-features'
- '--all-features'
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
components: clippy
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Run cargo clippy
run: cargo clippy --workspace --all-targets ${{ matrix.features }} -- -D warnings

license-header:
name: Check license header
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check license headers
uses: korandoru/hawkeye@v5

cargo-deny:
name: Cargo Deny License Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: EmbarkStudios/cargo-deny-action@v1
with:
command: check license

coverage:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
needs: [clippy]
steps:
- uses: actions/checkout@v3
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
- name: Install toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
components: llvm-tools-preview
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov
- name: Collect coverage data
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info --all-features
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
UNITTEST_LOG_DIR: "__unittest_logs"
- name: Codecov upload
uses: codecov/codecov-action@v2
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./lcov.info
flags: rust
fail_ci_if_error: false
verbose: true
12 changes: 11 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,14 @@ Cargo.lock
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/

venv
/benchmark_data

private/
*.txt

/perf.*
/flamegraph.svg

107 changes: 107 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "orc-rust"
version = "0.4.1"
edition = "2021"
homepage = "https://github.com/datafusion-contrib/datafusion-orc"
repository = "https://github.com/datafusion-contrib/datafusion-orc"
authors = ["Weny <[email protected]>", "Jeffrey Vo <[email protected]>"]
license = "Apache-2.0"
description = "Implementation of Apache ORC file format using Apache Arrow in-memory format"
keywords = ["arrow", "orc", "arrow-rs", "datafusion"]
include = ["src/**/*.rs", "Cargo.toml"]
rust-version = "1.73"

[package.metadata.docs.rs]
all-features = true

[dependencies]
arrow = { version = "52", features = ["prettyprint", "chrono-tz"] }
bytemuck = { version = "1.18.0", features = ["must_cast"] }
bytes = "1.4"
chrono = { version = "0.4.37", default-features = false, features = ["std"] }
chrono-tz = "0.9"
fallible-streaming-iterator = { version = "0.1" }
flate2 = "1"
lz4_flex = "0.11"
lzokay-native = "0.1"
num = "0.4.1"
prost = { version = "0.12" }
snafu = "0.8"
snap = "1.1"
zstd = "0.12"

# async support
async-trait = { version = "0.1.77", optional = true }
futures = { version = "0.3", optional = true, default-features = false, features = ["std"] }
futures-util = { version = "0.3", optional = true }
tokio = { version = "1.28", optional = true, features = [
"io-util",
"sync",
"fs",
"macros",
"rt",
"rt-multi-thread",
] }

# cli
anyhow = { version = "1.0", optional = true }
clap = { version = "4.5.4", features = ["derive"], optional = true }

# opendal
opendal = { version = "0.48", optional = true, default-features = false }

[dev-dependencies]
arrow-ipc = { version = "52.0.0", features = ["lz4"] }
arrow-json = "52.0.0"
criterion = { version = "0.5", default-features = false, features = ["async_tokio"] }
opendal = { version = "0.48", default-features = false, features = ["services-memory"] }
pretty_assertions = "1.3.0"
proptest = "1.0.0"
serde_json = { version = "1.0", default-features = false, features = ["std"] }

[features]
default = ["async"]

async = ["async-trait", "futures", "futures-util", "tokio"]
cli = ["anyhow", "clap"]
# Enable opendal support.
opendal = ["dep:opendal"]

[[bench]]
name = "arrow_reader"
harness = false
required-features = ["async"]
# Some issue when publishing and path isn't specified, so adding here
path = "./benches/arrow_reader.rs"

[profile.bench]
debug = true

[[bin]]
name = "orc-metadata"
required-features = ["cli"]

[[bin]]
name = "orc-export"
required-features = ["cli"]

[[bin]]
name = "orc-stats"
required-features = ["cli"]
17 changes: 17 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
.PHONY: fmt
fmt: ## Format all the Rust code.
cargo fmt --all


.PHONY: clippy
clippy: ## Check clippy rules.
cargo clippy --workspace --all-targets -- -D warnings


.PHONY: fmt-toml
fmt-toml: ## Format all TOML files.
taplo format --option "indent_string= "

.PHONY: check-toml
check-toml: ## Check all TOML files.
taplo format --check --option "indent_string= "
Loading

0 comments on commit 6c63cae

Please sign in to comment.