From 4c4a3e427bf93a0b12a5bb20dc04086982623f43 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 2 Oct 2025 07:02:34 -0400 Subject: [PATCH 1/4] [Variant] Improve documentation and make kernels consistent --- .../benches/variant_kernels.rs | 5 +++-- parquet-variant-compute/src/lib.rs | 17 +++++++++++------ parquet/src/variant.rs | 13 +++++-------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/parquet-variant-compute/benches/variant_kernels.rs b/parquet-variant-compute/benches/variant_kernels.rs index 3cdb28229b8a..2fcf436354cf 100644 --- a/parquet-variant-compute/benches/variant_kernels.rs +++ b/parquet-variant-compute/benches/variant_kernels.rs @@ -19,8 +19,9 @@ use arrow::array::{Array, ArrayRef, StringArray}; use arrow::util::test_util::seedable_rng; use criterion::{criterion_group, criterion_main, Criterion}; use parquet_variant::{Variant, VariantBuilder}; -use parquet_variant_compute::variant_get::{variant_get, GetOptions}; -use parquet_variant_compute::{json_to_variant, VariantArray, VariantArrayBuilder}; +use parquet_variant_compute::{ + json_to_variant, variant_get, GetOptions, VariantArray, VariantArrayBuilder, +}; use rand::distr::Alphanumeric; use rand::rngs::StdRng; use rand::Rng; diff --git a/parquet-variant-compute/src/lib.rs b/parquet-variant-compute/src/lib.rs index 5575571589e3..8bb228517fd2 100644 --- a/parquet-variant-compute/src/lib.rs +++ b/parquet-variant-compute/src/lib.rs @@ -20,10 +20,14 @@ //! ## Main APIs //! - [`VariantArray`] : Represents an array of `Variant` values. //! - [`VariantArrayBuilder`]: For building [`VariantArray`] -//! - [`json_to_variant`]: Function to convert a batch of JSON strings to a `VariantArray`. -//! - [`variant_to_json`]: Function to convert a `VariantArray` to a batch of JSON strings. -//! - [`mod@cast_to_variant`]: Module to cast other Arrow arrays to `VariantArray`. -//! - [`variant_get`]: Module to get values from a `VariantArray` using a specified [`VariantPath`] +//! +//! # Compute Kernels +//! - [`json_to_variant()`]: Function to convert a Arrays of JSON strings to a `VariantArray`. +//! - [`variant_to_json()`]: Function to convert a `VariantArray` to Arrays of JSON strings. +//! - [`cast_to_variant()`]: Cast Arrow arrays to `VariantArray`. +//! - [`variant_get()`]: Convert `VariantArray` (or an inner path) to ArrowArrays type +//! - [`shred_variant()`]: Shred a `VariantArray` +//! - [`unshred_variant()`]: Unshred a `VariantArray`. //! //! ## 🚧 Work In Progress //! @@ -36,7 +40,7 @@ //! [Variant issue]: https://github.com/apache/arrow-rs/issues/6736 mod arrow_to_variant; -pub mod cast_to_variant; +mod cast_to_variant; mod from_json; mod shred_variant; mod to_json; @@ -44,7 +48,7 @@ mod type_conversion; mod unshred_variant; mod variant_array; mod variant_array_builder; -pub mod variant_get; +mod variant_get; mod variant_to_arrow; pub use variant_array::{BorrowedShreddingState, ShreddingState, VariantArray, VariantType}; @@ -56,3 +60,4 @@ pub use shred_variant::shred_variant; pub use to_json::variant_to_json; pub use type_conversion::CastOptions; pub use unshred_variant::unshred_variant; +pub use variant_get::{variant_get, GetOptions}; diff --git a/parquet/src/variant.rs b/parquet/src/variant.rs index 497d1dc6c4f3..b135f2bb7a59 100644 --- a/parquet/src/variant.rs +++ b/parquet/src/variant.rs @@ -22,14 +22,11 @@ //! Note: Requires the `variant_experimental` feature of the `parquet` crate to be enabled. //! //! # Features -//! * [`Variant`] represents variant value, which can be an object, list, or primitive. -//! * [`VariantBuilder`] for building `Variant` values. -//! * [`VariantArray`] for representing a column of Variant values. -//! * [`json_to_variant`] and [`variant_to_json`] for converting to/from JSON. -//! * [`cast_to_variant()`] for casting other Arrow arrays to `VariantArray`. -//! * [`VariantType`] Arrow ExtensionType for Parquet Variant logical type. -//! [`variant_get`] to extracting a value by path and functions to convert -//! between `Variant` and JSON. +//! * Representation of [`Variant`], and [`VariantArray`] for working with +//! Variant values (see [`parquet_variant`] for more details) +//! * Kernels for working with arrays of Variant values +//! such as conversion between `Variant` and JSON, and shredding/unshredding +//! (see [`parquet_variant_compute`] for more details) //! //! # Example: Writing a Parquet file with Variant column //! ```rust From 172cf4554586401cc1a5cccef2fb7acd1d55888a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 2 Oct 2025 07:43:34 -0400 Subject: [PATCH 2/4] fix --- parquet-variant-compute/src/cast_to_variant.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index 1f43144f789c..ba20a1d358da 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -34,7 +34,7 @@ use arrow_schema::ArrowError; /// ``` /// # use arrow::array::{Array, ArrayRef, Int64Array}; /// # use parquet_variant::Variant; -/// # use parquet_variant_compute::cast_to_variant::cast_to_variant; +/// # use parquet_variant_compute::cast_to_variant; /// // input is an Int64Array, which will be cast to a VariantArray /// let input = Int64Array::from(vec![Some(1), None, Some(3)]); /// let result = cast_to_variant(&input).unwrap(); From d6d3d1a5fb2ad2f97ff93808d8a341041644ba9a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 2 Oct 2025 09:20:29 -0400 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- parquet-variant-compute/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parquet-variant-compute/src/lib.rs b/parquet-variant-compute/src/lib.rs index 8bb228517fd2..78a07d24516e 100644 --- a/parquet-variant-compute/src/lib.rs +++ b/parquet-variant-compute/src/lib.rs @@ -22,12 +22,12 @@ //! - [`VariantArrayBuilder`]: For building [`VariantArray`] //! //! # Compute Kernels -//! - [`json_to_variant()`]: Function to convert a Arrays of JSON strings to a `VariantArray`. -//! - [`variant_to_json()`]: Function to convert a `VariantArray` to Arrays of JSON strings. +//! - [`json_to_variant()`]: Function to convert Arrays of JSON strings to a `VariantArray`. +//! - [`variant_to_json()`]: Function to convert a `VariantArray` to arrays of JSON strings. //! - [`cast_to_variant()`]: Cast Arrow arrays to `VariantArray`. //! - [`variant_get()`]: Convert `VariantArray` (or an inner path) to ArrowArrays type //! - [`shred_variant()`]: Shred a `VariantArray` -//! - [`unshred_variant()`]: Unshred a `VariantArray`. +//! - [`unshred_variant()`]: Unshred a `VariantArray`. //! //! ## 🚧 Work In Progress //! From 04ae1251e000eda8df5150f57a7e320157fcde1c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 3 Oct 2025 16:48:57 -0400 Subject: [PATCH 4/4] Apply suggestions from code review Co-authored-by: Ryan Johnson --- parquet-variant-compute/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parquet-variant-compute/src/lib.rs b/parquet-variant-compute/src/lib.rs index 9cfc647150bc..f529c27a8ffd 100644 --- a/parquet-variant-compute/src/lib.rs +++ b/parquet-variant-compute/src/lib.rs @@ -25,9 +25,9 @@ //! - [`json_to_variant()`]: Function to convert Arrays of JSON strings to a `VariantArray`. //! - [`variant_to_json()`]: Function to convert a `VariantArray` to arrays of JSON strings. //! - [`cast_to_variant()`]: Cast Arrow arrays to `VariantArray`. -//! - [`variant_get()`]: Convert `VariantArray` (or an inner path) to ArrowArrays type -//! - [`shred_variant()`]: Shred a `VariantArray` -//! - [`unshred_variant()`]: Unshred a `VariantArray`. +//! - [`variant_get()`]: Convert `VariantArray` (or an inner path) to a strongly-typed Arrow array. +//! - [`shred_variant()`]: Shred a `VariantArray` according to the provided shredding schema +//! - [`unshred_variant()`]: Unshred a `VariantArray` to pure binary variant. //! //! ## 🚧 Work In Progress //!