diff --git a/Cargo.toml b/Cargo.toml index c17f4f7affaa..55855d09d50e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,21 +77,21 @@ version = "45.0.0" ahash = { version = "0.8", default-features = false, features = [ "runtime-rng", ] } -arrow = { version = "54.0.0", features = [ +arrow = { version = "54.1.0", features = [ "prettyprint", ] } -arrow-array = { version = "54.0.0", default-features = false, features = [ +arrow-array = { version = "54.1.0", default-features = false, features = [ "chrono-tz", ] } -arrow-buffer = { version = "54.0.0", default-features = false } -arrow-flight = { version = "54.0.0", features = [ +arrow-buffer = { version = "54.1.0", default-features = false } +arrow-flight = { version = "54.1.0", features = [ "flight-sql-experimental", ] } -arrow-ipc = { version = "54.0.0", default-features = false, features = [ +arrow-ipc = { version = "54.1.0", default-features = false, features = [ "lz4", ] } -arrow-ord = { version = "54.0.0", default-features = false } -arrow-schema = { version = "54.0.0", default-features = false } +arrow-ord = { version = "54.1.0", default-features = false } +arrow-schema = { version = "54.1.0", default-features = false } async-trait = "0.1.73" bigdecimal = "0.4.7" bytes = "1.4" @@ -133,7 +133,7 @@ itertools = "0.14" log = "^0.4" object_store = { version = "0.11.0", default-features = false } parking_lot = "0.12" -parquet = { version = "54.0.0", default-features = false, features = [ +parquet = { version = "54.1.0", default-features = false, features = [ "arrow", "async", "object_store", diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 4a4d0157c620..a5cf71426607 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -175,9 +175,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ccdcc8fb14508ca20aaec7076032e5c0b0751b906036d4496786e2f227a37a" +checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" dependencies = [ "arrow-arith", "arrow-array", @@ -196,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1aad8e27f32e411a0fc0bf5a625a35f0bf9b9f871cf4542abe31f7cef4beea2" +checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" dependencies = [ "arrow-array", "arrow-buffer", @@ -210,9 +210,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6ed90c28c6f73a706c55799b8cc3a094e89257238e5b1d65ca7c70bd3ae23f" +checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" dependencies = [ "ahash", "arrow-buffer", @@ -227,9 +227,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4a40bdc1552ea10fbdeae4e5a945d8572c32f66bce457b96c13d9c46b80447" +checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" dependencies = [ "bytes", "half", @@ -238,9 +238,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "430c0a21aa7f81bcf0f97c57216d7127795ea755f494d27bae2bd233be43c2cc" +checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" dependencies = [ "arrow-array", "arrow-buffer", @@ -259,9 +259,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4444c8f8c57ac00e6a679ede67d1ae8872c170797dc45b46f75702437a77888" +checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" dependencies = [ "arrow-array", "arrow-cast", @@ -275,9 +275,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09af476cfbe9879937e50b1334c73189de6039186e025b1b1ac84b283b87b20e" +checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" dependencies = [ "arrow-buffer", "arrow-schema", @@ -287,9 +287,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136296e8824333a8a4c4a6e508e4aa65d5678b801246d0408825ae7b2523c628" +checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" dependencies = [ "arrow-array", "arrow-buffer", @@ -301,9 +301,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e222ad0e419ab8276818c5605a5bb1e35ed86fa8c5e550726433cc63b09c3c78" +checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" dependencies = [ "arrow-array", "arrow-buffer", @@ -321,9 +321,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eddf14c5f03b679ec8ceac4dfac43f63cdc4ed54dab3cc120a4ef46af38481eb" +checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" dependencies = [ "arrow-array", "arrow-buffer", @@ -334,9 +334,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9acdc58da19f383f4ba381fa0e3583534ae2ceb31269aaf4a03f08ff13e8443" +checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" dependencies = [ "arrow-array", "arrow-buffer", @@ -347,15 +347,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1822a1a952955637e85e8f9d6b0e04dd75d65492b87ec548dd593d3a1f772b" +checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" [[package]] name = "arrow-select" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c4172e9a12dfe15303d3926269f9ead471ea93bdd067d113abc65cb6c48e246" +checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" dependencies = [ "ahash", "arrow-array", @@ -367,9 +367,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73683040445f4932342781926189901c9521bb1a787c35dbe628a3ce51372d3c" +checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" dependencies = [ "arrow-array", "arrow-buffer", @@ -2901,9 +2901,9 @@ dependencies = [ [[package]] name = "parquet" -version = "54.0.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3334c50239d9f4951653d84fa6f636da86f53742e5e5849a30fbe852f3ff4383" +checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" dependencies = [ "ahash", "arrow-array", @@ -2927,6 +2927,7 @@ dependencies = [ "object_store", "paste", "seq-macro", + "simdutf8", "snap", "thrift", "tokio", @@ -3716,6 +3717,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "siphasher" version = "1.0.1" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 6b6b52fce743..7daa32562173 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -29,7 +29,7 @@ rust-version = "1.81.0" readme = "README.md" [dependencies] -arrow = { version = "54.0.0" } +arrow = { version = "54.1.0" } async-trait = "0.1.0" aws-config = "1.5.0" aws-credential-types = "1.2.0" @@ -57,7 +57,7 @@ home = "=0.5.11" mimalloc = { version = "0.1", default-features = false } object_store = { version = "0.11.0", features = ["aws", "gcp", "http"] } parking_lot = { version = "0.12" } -parquet = { version = "54.0.0", default-features = false } +parquet = { version = "54.1.0", default-features = false } regex = "1.8" rustyline = "15.0" tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] } diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs index 54344d55bbd1..82735334c7f8 100644 --- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs +++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs @@ -309,10 +309,8 @@ impl FileOpener for ArrowOpener { for (dict_block, dict_result) in footer.dictionaries().iter().flatten().zip(dict_results) { - decoder.read_dictionary( - dict_block, - &Buffer::from_bytes(dict_result.into()), - )?; + decoder + .read_dictionary(dict_block, &Buffer::from(dict_result))?; } // filter recordbatches according to range @@ -348,10 +346,7 @@ impl FileOpener for ArrowOpener { .zip(recordbatch_results) .filter_map(move |(block, data)| { decoder - .read_record_batch( - &block, - &Buffer::from_bytes(data.into()), - ) + .read_record_batch(&block, &Buffer::from(data)) .transpose() }), ) diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 1ebbf92c736e..eed11f634c9d 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -30,8 +30,8 @@ use arrow::{ record_batch::RecordBatch, }; use arrow_array::{ - Array, BooleanArray, DictionaryArray, Float32Array, Float64Array, Int8Array, - UnionArray, + record_batch, Array, BooleanArray, DictionaryArray, Float32Array, Float64Array, + Int8Array, UnionArray, }; use arrow_buffer::ScalarBuffer; use arrow_schema::{ArrowError, SchemaRef, UnionFields, UnionMode}; @@ -1121,6 +1121,39 @@ async fn join() -> Result<()> { Ok(()) } +#[tokio::test] +async fn join_coercion_unnnamed() -> Result<()> { + let ctx = SessionContext::new(); + + // Test that join will coerce column types when necessary + // even when the relations don't have unique names + let left = ctx.read_batch(record_batch!( + ("id", Int32, [1, 2, 3]), + ("name", Utf8, ["a", "b", "c"]) + )?)?; + let right = ctx.read_batch(record_batch!( + ("id", Int32, [10, 3]), + ("name", Utf8View, ["d", "c"]) // Utf8View is a different type + )?)?; + let cols = vec!["name", "id"]; + + let filter = None; + let join = right.join(left, JoinType::LeftAnti, &cols, &cols, filter)?; + let results = join.collect().await?; + + assert_batches_sorted_eq!( + [ + "+----+------+", + "| id | name |", + "+----+------+", + "| 10 | d |", + "+----+------+", + ], + &results + ); + Ok(()) +} + #[tokio::test] async fn join_on() -> Result<()> { let left = test_table_with_name("a") diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs index 993051eaeee1..fc1955705ee0 100644 --- a/datafusion/expr-common/src/interval_arithmetic.rs +++ b/datafusion/expr-common/src/interval_arithmetic.rs @@ -26,6 +26,8 @@ use std::ops::{AddAssign, SubAssign}; use arrow::compute::{cast_with_options, CastOptions}; use arrow::datatypes::{ DataType, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, TimeUnit, + MAX_DECIMAL128_FOR_EACH_PRECISION, MAX_DECIMAL256_FOR_EACH_PRECISION, + MIN_DECIMAL128_FOR_EACH_PRECISION, MIN_DECIMAL256_FOR_EACH_PRECISION, }; use datafusion_common::rounding::{alter_fp_rounding_mode, next_down, next_up}; use datafusion_common::{internal_err, Result, ScalarValue}; @@ -97,718 +99,6 @@ macro_rules! get_extreme_value { }; } -/// The maximum `i128` value that can be stored in a `Decimal128` value of precision `p`. -/// -/// Remove this once is available -const MAX_DECIMAL128_FOR_EACH_PRECISION: [i128; 39] = [ - 0, // unused first element - 9, - 99, - 999, - 9999, - 99999, - 999999, - 9999999, - 99999999, - 999999999, - 9999999999, - 99999999999, - 999999999999, - 9999999999999, - 99999999999999, - 999999999999999, - 9999999999999999, - 99999999999999999, - 999999999999999999, - 9999999999999999999, - 99999999999999999999, - 999999999999999999999, - 9999999999999999999999, - 99999999999999999999999, - 999999999999999999999999, - 9999999999999999999999999, - 99999999999999999999999999, - 999999999999999999999999999, - 9999999999999999999999999999, - 99999999999999999999999999999, - 999999999999999999999999999999, - 9999999999999999999999999999999, - 99999999999999999999999999999999, - 999999999999999999999999999999999, - 9999999999999999999999999999999999, - 99999999999999999999999999999999999, - 999999999999999999999999999999999999, - 9999999999999999999999999999999999999, - 99999999999999999999999999999999999999, -]; - -/// The minimum `i128` value that can be stored in a `Decimal128` value of precision `p`. -/// -/// Remove this once is available -const MIN_DECIMAL128_FOR_EACH_PRECISION: [i128; 39] = [ - 0, // unused first element - -9, - -99, - -999, - -9999, - -99999, - -999999, - -9999999, - -99999999, - -999999999, - -9999999999, - -99999999999, - -999999999999, - -9999999999999, - -99999999999999, - -999999999999999, - -9999999999999999, - -99999999999999999, - -999999999999999999, - -9999999999999999999, - -99999999999999999999, - -999999999999999999999, - -9999999999999999999999, - -99999999999999999999999, - -999999999999999999999999, - -9999999999999999999999999, - -99999999999999999999999999, - -999999999999999999999999999, - -9999999999999999999999999999, - -99999999999999999999999999999, - -999999999999999999999999999999, - -9999999999999999999999999999999, - -99999999999999999999999999999999, - -999999999999999999999999999999999, - -9999999999999999999999999999999999, - -99999999999999999999999999999999999, - -999999999999999999999999999999999999, - -9999999999999999999999999999999999999, - -99999999999999999999999999999999999999, -]; - -/// The maximum `i256` value that can be stored in a `Decimal256` value of precision `p`. -/// -/// Remove this once is available -const MAX_DECIMAL256_FOR_EACH_PRECISION: [arrow::datatypes::i256; 77] = [ - arrow::datatypes::i256::from_i128(0_i128), // unused first element - arrow::datatypes::i256::from_le_bytes([ - 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223, - 37, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122, - 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203, - 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241, - 123, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111, - 215, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106, - 192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180, - 179, 39, 132, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5, - 141, 41, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142, - 50, 130, 159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249, - 21, 59, 108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191, - 219, 78, 58, 172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149, - 20, 71, 186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227, - 214, 205, 198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230, - 100, 10, 196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3, - 241, 103, 168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36, - 106, 15, 148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113, - 37, 154, 200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115, - 118, 5, 214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128, - 160, 54, 92, 2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1, - 69, 34, 154, 23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105, - 18, 178, 86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184, - 244, 98, 53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49, - 143, 221, 21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237, - 151, 167, 218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239, - 139, 138, 144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191, - 88, 119, 105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118, - 119, 169, 30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161, - 170, 158, 50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79, - 170, 50, 250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167, - 250, 197, 103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25, - 135, 202, 187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49, - 254, 70, 233, 85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237, - 197, 28, 91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75, - 187, 31, 143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139, - 242, 80, 61, 151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2, - ]), - arrow::datatypes::i256::from_le_bytes([ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119, - 121, 41, 101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22, - ]), -]; - -/// The minimum `i256` value that can be stored in a `Decimal256` value of precision `p`. -/// -/// Remove this once is available -const MIN_DECIMAL256_FOR_EACH_PRECISION: [arrow::datatypes::i256; 77] = [ - arrow::datatypes::i256::from_i128(0_i128), // unused first element - arrow::datatypes::i256::from_le_bytes([ - 247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63, - 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123, - 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214, - 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40, - 249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147, - 187, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197, - 83, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235, - 184, 69, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57, - 185, 244, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245, - 59, 60, 143, 245, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90, - 152, 151, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107, - 135, 243, 235, 251, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55, - 74, 131, 55, 215, 255, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41, - 230, 32, 43, 104, 254, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201, - 163, 253, 72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221, - 101, 232, 217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19, - 131, 120, 232, 198, 249, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202, - 199, 30, 181, 20, 197, 193, 255, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234, - 205, 51, 19, 207, 178, 145, 253, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37, - 11, 6, 192, 22, 252, 176, 231, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117, - 111, 60, 128, 227, 216, 233, 12, 255, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150, - 90, 92, 2, 227, 120, 34, 129, 246, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225, - 137, 155, 23, 222, 184, 88, 11, 161, 255, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205, - 98, 19, 236, 172, 56, 119, 113, 74, 252, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220, - 193, 56, 193, 54, 168, 110, 232, 218, 255, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58, - 152, 146, 55, 140, 35, 146, 82, 20, 141, 254, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68, - 242, 185, 43, 122, 99, 181, 57, 203, 130, 241, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118, - 67, 181, 197, 226, 21, 65, 240, 27, 111, 255, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162, - 162, 20, 185, 219, 218, 138, 98, 23, 87, 250, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112, - 90, 90, 206, 58, 149, 140, 108, 217, 233, 102, 199, 255, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104, - 136, 135, 15, 76, 212, 125, 61, 126, 34, 5, 202, 253, - ]), - arrow::datatypes::i256::from_le_bytes([ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23, - 84, 75, 155, 248, 74, 234, 102, 238, 88, 51, 228, 233, - ]), -]; - macro_rules! value_transition { ($bound:ident, $direction:expr, $value:expr) => { match $value { diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 571c17119427..a760a3a632bc 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -507,18 +507,19 @@ fn type_union_resolution_coercion( None } - let types = lhs + let coerced_types = lhs .iter() .map(|lhs_field| search_corresponding_coerced_type(lhs_field, rhs)) .collect::>>()?; - let fields = types + // preserve the field name and nullability + let orig_fields = std::iter::zip(lhs.iter(), rhs.iter()); + + let fields: Vec = coerced_types .into_iter() - .enumerate() - .map(|(i, datatype)| { - Arc::new(Field::new(format!("c{i}"), datatype, true)) - }) - .collect::>(); + .zip(orig_fields) + .map(|(datatype, (lhs, rhs))| coerce_fields(datatype, lhs, rhs)) + .collect(); Some(DataType::Struct(fields.into())) } _ => { @@ -684,8 +685,10 @@ fn string_numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Some(Utf8), (LargeUtf8, _) if rhs_type.is_numeric() => Some(LargeUtf8), + (Utf8View, _) if rhs_type.is_numeric() => Some(Utf8View), (_, Utf8) if lhs_type.is_numeric() => Some(Utf8), (_, LargeUtf8) if lhs_type.is_numeric() => Some(LargeUtf8), + (_, Utf8View) if lhs_type.is_numeric() => Some(Utf8View), _ => None, } } diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs index da5ec739ad8d..1dda843b040e 100644 --- a/datafusion/functions-aggregate/src/min_max.rs +++ b/datafusion/functions-aggregate/src/min_max.rs @@ -32,11 +32,11 @@ use arrow::array::{ }; use arrow::compute; use arrow::datatypes::{ - DataType, Decimal128Type, Decimal256Type, Float16Type, Float32Type, Float64Type, - Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, - UInt8Type, + DataType, Decimal128Type, Decimal256Type, DurationMicrosecondType, + DurationMillisecondType, DurationNanosecondType, DurationSecondType, Float16Type, + Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalUnit, + UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; -use arrow_schema::IntervalUnit; use datafusion_common::stats::Precision; use datafusion_common::{ downcast_value, exec_err, internal_err, ColumnStatistics, DataFusionError, Result, @@ -264,6 +264,7 @@ impl AggregateUDFImpl for Max { | Binary | LargeBinary | BinaryView + | Duration(_) ) } @@ -318,6 +319,18 @@ impl AggregateUDFImpl for Max { Timestamp(Nanosecond, _) => { primitive_max_accumulator!(data_type, i64, TimestampNanosecondType) } + Duration(Second) => { + primitive_max_accumulator!(data_type, i64, DurationSecondType) + } + Duration(Millisecond) => { + primitive_max_accumulator!(data_type, i64, DurationMillisecondType) + } + Duration(Microsecond) => { + primitive_max_accumulator!(data_type, i64, DurationMicrosecondType) + } + Duration(Nanosecond) => { + primitive_max_accumulator!(data_type, i64, DurationNanosecondType) + } Decimal128(_, _) => { primitive_max_accumulator!(data_type, i128, Decimal128Type) } @@ -1091,6 +1104,7 @@ impl AggregateUDFImpl for Min { | Binary | LargeBinary | BinaryView + | Duration(_) ) } @@ -1145,6 +1159,18 @@ impl AggregateUDFImpl for Min { Timestamp(Nanosecond, _) => { primitive_min_accumulator!(data_type, i64, TimestampNanosecondType) } + Duration(Second) => { + primitive_min_accumulator!(data_type, i64, DurationSecondType) + } + Duration(Millisecond) => { + primitive_min_accumulator!(data_type, i64, DurationMillisecondType) + } + Duration(Microsecond) => { + primitive_min_accumulator!(data_type, i64, DurationMicrosecondType) + } + Duration(Nanosecond) => { + primitive_min_accumulator!(data_type, i64, DurationNanosecondType) + } Decimal128(_, _) => { primitive_min_accumulator!(data_type, i128, Decimal128Type) } diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs index 468d3d548bcf..1f99cc3a5f0b 100644 --- a/datafusion/functions/benches/regx.rs +++ b/datafusion/functions/benches/regx.rs @@ -18,7 +18,7 @@ extern crate criterion; use arrow::array::builder::StringBuilder; -use arrow::array::{ArrayRef, AsArray, Int64Array, StringArray}; +use arrow::array::{ArrayRef, AsArray, Int64Array, StringArray, StringViewArray}; use arrow::compute::cast; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; @@ -141,6 +141,20 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); + c.bench_function("regexp_like_1000 utf8view", |b| { + let mut rng = rand::thread_rng(); + let data = cast(&data(&mut rng), &DataType::Utf8View).unwrap(); + let regex = cast(®ex(&mut rng), &DataType::Utf8View).unwrap(); + let flags = cast(&flags(&mut rng), &DataType::Utf8View).unwrap(); + + b.iter(|| { + black_box( + regexp_like(&[Arc::clone(&data), Arc::clone(®ex), Arc::clone(&flags)]) + .expect("regexp_like should work on valid values"), + ) + }) + }); + c.bench_function("regexp_match_1000", |b| { let mut rng = rand::thread_rng(); let data = Arc::new(data(&mut rng)) as ArrayRef; @@ -149,7 +163,25 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - regexp_match::(&[ + regexp_match(&[ + Arc::clone(&data), + Arc::clone(®ex), + Arc::clone(&flags), + ]) + .expect("regexp_match should work on valid values"), + ) + }) + }); + + c.bench_function("regexp_match_1000 utf8view", |b| { + let mut rng = rand::thread_rng(); + let data = cast(&data(&mut rng), &DataType::Utf8View).unwrap(); + let regex = cast(®ex(&mut rng), &DataType::Utf8View).unwrap(); + let flags = cast(&flags(&mut rng), &DataType::Utf8View).unwrap(); + + b.iter(|| { + black_box( + regexp_match(&[ Arc::clone(&data), Arc::clone(®ex), Arc::clone(&flags), @@ -180,6 +212,29 @@ fn criterion_benchmark(c: &mut Criterion) { ) }) }); + + c.bench_function("regexp_replace_1000 utf8view", |b| { + let mut rng = rand::thread_rng(); + let data = cast(&data(&mut rng), &DataType::Utf8View).unwrap(); + let regex = cast(®ex(&mut rng), &DataType::Utf8View).unwrap(); + // flags are not allowed to be utf8view according to the function + let flags = Arc::new(flags(&mut rng)) as ArrayRef; + let replacement = Arc::new(StringViewArray::from_iter_values( + iter::repeat("XX").take(1000), + )); + + b.iter(|| { + black_box( + regexp_replace::( + data.as_string_view(), + regex.as_string_view(), + &replacement, + Some(&flags), + ) + .expect("regexp_replace should work on valid values"), + ) + }) + }); } criterion_group!(benches, criterion_benchmark); diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs index 06b9a9d98b47..57207ecfdacd 100644 --- a/datafusion/functions/src/regex/regexpmatch.rs +++ b/datafusion/functions/src/regex/regexpmatch.rs @@ -16,16 +16,14 @@ // under the License. //! Regex expressions -use arrow::array::{Array, ArrayRef, OffsetSizeTrait}; +use arrow::array::{Array, ArrayRef, AsArray}; use arrow::compute::kernels::regexp; use arrow::datatypes::DataType; use arrow::datatypes::Field; use datafusion_common::exec_err; use datafusion_common::ScalarValue; use datafusion_common::{arrow_datafusion_err, plan_err}; -use datafusion_common::{ - cast::as_generic_string_array, internal_err, DataFusionError, Result, -}; +use datafusion_common::{DataFusionError, Result}; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; use datafusion_macros::user_doc; @@ -86,11 +84,12 @@ impl RegexpMatchFunc { signature: Signature::one_of( vec![ // Planner attempts coercion to the target type starting with the most preferred candidate. - // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8, Utf8)`. - // If that fails, it proceeds to `(LargeUtf8, Utf8)`. - // TODO: Native support Utf8View for regexp_match. + // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`. + // If that fails, it proceeds to `(Utf8, Utf8)`. + TypeSignature::Exact(vec![Utf8View, Utf8View]), TypeSignature::Exact(vec![Utf8, Utf8]), TypeSignature::Exact(vec![LargeUtf8, LargeUtf8]), + TypeSignature::Exact(vec![Utf8View, Utf8View, Utf8View]), TypeSignature::Exact(vec![Utf8, Utf8, Utf8]), TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]), ], @@ -138,7 +137,7 @@ impl ScalarUDFImpl for RegexpMatchFunc { .map(|arg| arg.to_array(inferred_length)) .collect::>>()?; - let result = regexp_match_func(&args); + let result = regexp_match(&args); if is_scalar { // If all inputs are scalar, keeps output as scalar let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0)); @@ -153,33 +152,35 @@ impl ScalarUDFImpl for RegexpMatchFunc { } } -fn regexp_match_func(args: &[ArrayRef]) -> Result { - match args[0].data_type() { - DataType::Utf8 => regexp_match::(args), - DataType::LargeUtf8 => regexp_match::(args), - other => { - internal_err!("Unsupported data type {other:?} for function regexp_match") - } - } -} -pub fn regexp_match(args: &[ArrayRef]) -> Result { +pub fn regexp_match(args: &[ArrayRef]) -> Result { match args.len() { 2 => { - let values = as_generic_string_array::(&args[0])?; - let regex = as_generic_string_array::(&args[1])?; - regexp::regexp_match(values, regex, None) + regexp::regexp_match(&args[0], &args[1], None) .map_err(|e| arrow_datafusion_err!(e)) } 3 => { - let values = as_generic_string_array::(&args[0])?; - let regex = as_generic_string_array::(&args[1])?; - let flags = as_generic_string_array::(&args[2])?; - - if flags.iter().any(|s| s == Some("g")) { - return plan_err!("regexp_match() does not support the \"global\" option"); + match args[2].data_type() { + DataType::Utf8View => { + if args[2].as_string_view().iter().any(|s| s == Some("g")) { + return plan_err!("regexp_match() does not support the \"global\" option"); + } + } + DataType::Utf8 => { + if args[2].as_string::().iter().any(|s| s == Some("g")) { + return plan_err!("regexp_match() does not support the \"global\" option"); + } + } + DataType::LargeUtf8 => { + if args[2].as_string::().iter().any(|s| s == Some("g")) { + return plan_err!("regexp_match() does not support the \"global\" option"); + } + } + e => { + return plan_err!("regexp_match was called with unexpected data type {e:?}"); + } } - regexp::regexp_match(values, regex, Some(flags)) + regexp::regexp_match(&args[0], &args[1], Some(&args[2])) .map_err(|e| arrow_datafusion_err!(e)) } other => exec_err!( @@ -211,7 +212,7 @@ mod tests { expected_builder.append(false); let expected = expected_builder.finish(); - let re = regexp_match::(&[Arc::new(values), Arc::new(patterns)]).unwrap(); + let re = regexp_match(&[Arc::new(values), Arc::new(patterns)]).unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -236,9 +237,8 @@ mod tests { expected_builder.append(false); let expected = expected_builder.finish(); - let re = - regexp_match::(&[Arc::new(values), Arc::new(patterns), Arc::new(flags)]) - .unwrap(); + let re = regexp_match(&[Arc::new(values), Arc::new(patterns), Arc::new(flags)]) + .unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -250,7 +250,7 @@ mod tests { let flags = StringArray::from(vec!["g"]); let re_err = - regexp_match::(&[Arc::new(values), Arc::new(patterns), Arc::new(flags)]) + regexp_match(&[Arc::new(values), Arc::new(patterns), Arc::new(flags)]) .expect_err("unsupported flag should have failed"); assert_eq!(re_err.strip_backtrace(), "Error during planning: regexp_match() does not support the \"global\" option"); diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 48a5e2f9a07c..7a41f54c56e1 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -190,7 +190,15 @@ impl<'a> TypeCoercionRewriter<'a> { .map(|(lhs, rhs)| { // coerce the arguments as though they were a single binary equality // expression - let (lhs, rhs) = self.coerce_binary_op(lhs, Operator::Eq, rhs)?; + let left_schema = join.left.schema(); + let right_schema = join.right.schema(); + let (lhs, rhs) = self.coerce_binary_op( + lhs, + left_schema, + Operator::Eq, + rhs, + right_schema, + )?; Ok((lhs, rhs)) }) .collect::>>()?; @@ -275,17 +283,19 @@ impl<'a> TypeCoercionRewriter<'a> { fn coerce_binary_op( &self, left: Expr, + left_schema: &DFSchema, op: Operator, right: Expr, + right_schema: &DFSchema, ) -> Result<(Expr, Expr)> { let (left_type, right_type) = get_input_types( - &left.get_type(self.schema)?, + &left.get_type(left_schema)?, &op, - &right.get_type(self.schema)?, + &right.get_type(right_schema)?, )?; Ok(( - left.cast_to(&left_type, self.schema)?, - right.cast_to(&right_type, self.schema)?, + left.cast_to(&left_type, left_schema)?, + right.cast_to(&right_type, right_schema)?, )) } } @@ -404,7 +414,8 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> { )))) } Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - let (left, right) = self.coerce_binary_op(*left, op, *right)?; + let (left, right) = + self.coerce_binary_op(*left, self.schema, op, *right, self.schema)?; Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr::new( Box::new(left), op, diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs index 892d450ba85b..e2b8a966cb92 100644 --- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs +++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs @@ -26,7 +26,8 @@ use crate::{OptimizerConfig, OptimizerRule}; use crate::utils::NamePreserver; use arrow::datatypes::{ - DataType, TimeUnit, MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION, + DataType, TimeUnit, MAX_DECIMAL128_FOR_EACH_PRECISION, + MIN_DECIMAL128_FOR_EACH_PRECISION, }; use arrow::temporal_conversions::{MICROSECONDS, MILLISECONDS, NANOSECONDS}; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; @@ -369,8 +370,8 @@ fn try_cast_numeric_literal( // Different precision for decimal128 can store different range of value. // For example, the precision is 3, the max of value is `999` and the min // value is `-999` - MIN_DECIMAL_FOR_EACH_PRECISION[*precision as usize - 1], - MAX_DECIMAL_FOR_EACH_PRECISION[*precision as usize - 1], + MIN_DECIMAL128_FOR_EACH_PRECISION[*precision as usize], + MAX_DECIMAL128_FOR_EACH_PRECISION[*precision as usize], ), _ => return None, }; diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index c8c544785d6d..f181c9bd2cfd 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -3807,6 +3807,52 @@ SELECT MIN(value), MAX(value) FROM timestampmicrosecond statement ok DROP TABLE timestampmicrosecond; +# min_duration, max_duration +statement ok +create table d +as values + (arrow_cast(1, 'Duration(Second)'), arrow_cast(2, 'Duration(Millisecond)'), arrow_cast(3, 'Duration(Microsecond)'), arrow_cast(4, 'Duration(Nanosecond)'), 1), + (arrow_cast(11, 'Duration(Second)'),arrow_cast(22, 'Duration(Millisecond)'), arrow_cast(33, 'Duration(Microsecond)'), arrow_cast(44, 'Duration(Nanosecond)'), 1); + +query ???? +SELECT min(column1), min(column2), min(column3), min(column4) FROM d; +---- +0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 0.002 secs 0 days 0 hours 0 mins 0.000003 secs 0 days 0 hours 0 mins 0.000000004 secs + +query ???? +SELECT max(column1), max(column2), max(column3), max(column4) FROM d; +---- +0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs + +# GROUP BY follows a different code path +query ????I +SELECT min(column1), min(column2), min(column3), min(column4), column5 FROM d GROUP BY column5; +---- +0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 0.002 secs 0 days 0 hours 0 mins 0.000003 secs 0 days 0 hours 0 mins 0.000000004 secs 1 + +query ????I +SELECT max(column1), max(column2), max(column3), max(column4), column5 FROM d GROUP BY column5; +---- +0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs 1 + +statement ok +INSERT INTO d VALUES + (arrow_cast(3, 'Duration(Second)'), arrow_cast(1, 'Duration(Millisecond)'), arrow_cast(7, 'Duration(Microsecond)'), arrow_cast(2, 'Duration(Nanosecond)'), 1), + (arrow_cast(0, 'Duration(Second)'), arrow_cast(9, 'Duration(Millisecond)'), arrow_cast(5, 'Duration(Microsecond)'), arrow_cast(8, 'Duration(Nanosecond)'), 1); + +query ????I +SELECT max(column1), max(column2), max(column3), max(column4), column5 FROM d GROUP BY column5 ORDER BY column5; +---- +0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs 1 + +query ????I +SELECT min(column1), min(column2), min(column3), min(column4), column5 FROM d GROUP BY column5 ORDER BY column5; +---- +0 days 0 hours 0 mins 0 secs 0 days 0 hours 0 mins 0.001 secs 0 days 0 hours 0 mins 0.000003 secs 0 days 0 hours 0 mins 0.000000002 secs 1 + +statement ok +drop table d; + # max_bool statement ok CREATE TABLE max_bool (value BOOLEAN); diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt index 46e9c86c7591..8e470fe988d3 100644 --- a/datafusion/sqllogictest/test_files/case.slt +++ b/datafusion/sqllogictest/test_files/case.slt @@ -416,5 +416,58 @@ SELECT end FROM t; +statement ok +drop table t + +# Fix coercion of lists of structs +# https://github.com/apache/datafusion/issues/14154 + +statement ok +create or replace table t as values +( + 100, -- column1 int (so the case isn't constant folded) + [{ 'foo': arrow_cast('baz', 'Utf8View') }], -- column2 has List of Struct w/ Utf8View + [{ 'foo': 'bar' }], -- column3 has List of Struct w/ Utf8 + [{ 'foo': 'blarg' }] -- column4 has List of Struct w/ Utf8 +); + +# This case forces all branches to be coerced to the same type +query ? +SELECT + case + when column1 > 0 then column2 + when column1 < 0 then column3 + else column4 + end +FROM t; +---- +[{foo: baz}] + +# different orders of the branches +query ? +SELECT + case + when column1 > 0 then column3 -- NB different order + when column1 < 0 then column4 + else column2 + end +FROM t; +---- +[{foo: bar}] + +# different orders of the branches +query ? +SELECT + case + when column1 > 0 then column4 -- NB different order + when column1 < 0 then column2 + else column3 + end +FROM t; +---- +[{foo: blarg}] + + + statement ok drop table t diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt index 800026dd766d..80f94e21d1fe 100644 --- a/datafusion/sqllogictest/test_files/regexp.slt +++ b/datafusion/sqllogictest/test_files/regexp.slt @@ -193,6 +193,29 @@ NULL [Köln] [إسرائيل] +# test string view +statement ok +CREATE TABLE t_stringview AS +SELECT arrow_cast(str, 'Utf8View') as str, arrow_cast(pattern, 'Utf8View') as pattern, arrow_cast(flags, 'Utf8View') as flags FROM t; + +query ? +SELECT regexp_match(str, pattern, flags) FROM t_stringview; +---- +[a] +[A] +[B] +NULL +NULL +NULL +[010] +[Düsseldorf] +[Москва] +[Köln] +[إسرائيل] + +statement ok +DROP TABLE t_stringview; + query ? SELECT regexp_match('foobarbequebaz', ''); ---- @@ -354,6 +377,29 @@ X X X +# test string view +statement ok +CREATE TABLE t_stringview AS +SELECT arrow_cast(str, 'Utf8View') as str, arrow_cast(pattern, 'Utf8View') as pattern, arrow_cast(flags, 'Utf8View') as flags FROM t; + +query T +SELECT regexp_replace(str, pattern, 'X', concat('g', flags)) FROM t_stringview; +---- +Xbc +X +aXc +AbC +aBC +4000 +X +X +X +X +X + +statement ok +DROP TABLE t_stringview; + query T SELECT regexp_replace('ABCabcABC', '(abc)', 'X', 'gi'); ---- @@ -621,7 +667,7 @@ CREATE TABLE t_stringview AS SELECT arrow_cast(str, 'Utf8View') as str, arrow_cast(pattern, 'Utf8View') as pattern, arrow_cast(start, 'Int64') as start, arrow_cast(flags, 'Utf8View') as flags FROM t; query I -SELECT regexp_count(str, '\w') from t; +SELECT regexp_count(str, '\w') from t_stringview; ---- 3 3 @@ -636,7 +682,7 @@ SELECT regexp_count(str, '\w') from t; 7 query I -SELECT regexp_count(str, '\w{2}', start) from t; +SELECT regexp_count(str, '\w{2}', start) from t_stringview; ---- 1 1 @@ -651,7 +697,7 @@ SELECT regexp_count(str, '\w{2}', start) from t; 3 query I -SELECT regexp_count(str, 'ab', 1, 'i') from t; +SELECT regexp_count(str, 'ab', 1, 'i') from t_stringview; ---- 1 1 @@ -667,7 +713,7 @@ SELECT regexp_count(str, 'ab', 1, 'i') from t; query I -SELECT regexp_count(str, pattern) from t; +SELECT regexp_count(str, pattern) from t_stringview; ---- 1 1 @@ -682,7 +728,7 @@ SELECT regexp_count(str, pattern) from t; 1 query I -SELECT regexp_count(str, pattern, start) from t; +SELECT regexp_count(str, pattern, start) from t_stringview; ---- 1 1 @@ -697,7 +743,7 @@ SELECT regexp_count(str, pattern, start) from t; 1 query I -SELECT regexp_count(str, pattern, start, flags) from t; +SELECT regexp_count(str, pattern, start, flags) from t_stringview; ---- 1 1 @@ -713,7 +759,7 @@ SELECT regexp_count(str, pattern, start, flags) from t; # test type coercion query I -SELECT regexp_count(arrow_cast(str, 'Utf8'), arrow_cast(pattern, 'LargeUtf8'), arrow_cast(start, 'Int32'), flags) from t; +SELECT regexp_count(arrow_cast(str, 'Utf8'), arrow_cast(pattern, 'LargeUtf8'), arrow_cast(start, 'Int32'), flags) from t_stringview; ---- 1 1 diff --git a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt index 01071f03dce6..2f12e9c7a39b 100644 --- a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt +++ b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt @@ -34,6 +34,15 @@ statement ok create table test_substr as select arrow_cast(col1, 'Dictionary(Int32, Utf8)') as c1 from test_substr_base; +statement ok +create table test_datetime as +select + arrow_cast(column1, 'Dictionary(Int32, Utf8)') as ts, + arrow_cast(column2, 'Dictionary(Int32, Utf8)') as d, + arrow_cast(column3, 'Dictionary(Int32, Utf8)') as t +from test_datetime_base; + + statement ok drop table test_source @@ -56,3 +65,6 @@ drop table test_basic_operator; statement ok drop table test_substr_base; + +statement ok +drop table test_datetime_base; \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/string/init_data.slt.part b/datafusion/sqllogictest/test_files/string/init_data.slt.part index 06b65ff8e72a..7799dd605b90 100644 --- a/datafusion/sqllogictest/test_files/string/init_data.slt.part +++ b/datafusion/sqllogictest/test_files/string/init_data.slt.part @@ -37,3 +37,14 @@ statement ok create table test_substr_base ( col1 VARCHAR ) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'), (''), (NULL); + + +# -------------------------------------- +# Setup test tables with date/time values to test coercion +# -------------------------------------- +statement ok +create table test_datetime_base as values + ('2024-08-09T12:13:14', '2024-08-09', '12:13:14'), + ('2024-08-09T12:13:15', '2024-09-09', '12:14:14'), + (NULL, NULL, NULL) +; diff --git a/datafusion/sqllogictest/test_files/string/large_string.slt b/datafusion/sqllogictest/test_files/string/large_string.slt index 84f1e8382e53..93ec796ec6f0 100644 --- a/datafusion/sqllogictest/test_files/string/large_string.slt +++ b/datafusion/sqllogictest/test_files/string/large_string.slt @@ -34,6 +34,15 @@ statement ok create table test_substr as select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base; +statement ok +create table test_datetime as +select + arrow_cast(column1, 'LargeUtf8') as ts, + arrow_cast(column2, 'LargeUtf8') as d, + arrow_cast(column3, 'LargeUtf8') as t +from test_datetime_base; + + # select query TTTT SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator @@ -64,3 +73,6 @@ drop table test_basic_operator; statement ok drop table test_substr_base; + +statement ok +drop table test_datetime_base; \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/string/string.slt b/datafusion/sqllogictest/test_files/string/string.slt index 55f0c034f5f9..d724e672e0fc 100644 --- a/datafusion/sqllogictest/test_files/string/string.slt +++ b/datafusion/sqllogictest/test_files/string/string.slt @@ -34,6 +34,13 @@ statement ok create table test_substr as select arrow_cast(col1, 'Utf8') as c1 from test_substr_base; +statement ok +create table test_datetime as +select + arrow_cast(column1, 'Utf8') as ts, + arrow_cast(column2, 'Utf8') as d, + arrow_cast(column3, 'Utf8') as t +from test_datetime_base; # @@ -186,3 +193,6 @@ drop table test_basic_operator; statement ok drop table test_substr; + +statement ok +drop table test_datetime; diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part index 2414e5864c99..a2806859b5ba 100644 --- a/datafusion/sqllogictest/test_files/string/string_query.slt.part +++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part @@ -19,6 +19,10 @@ # with standard values, but different types in string columns # (String, StringView, etc.) +# -------------------------------------- +# Show the input data +# -------------------------------------- + # select query TTTT SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator @@ -35,6 +39,49 @@ _ \_ (empty) (empty) NULL % NULL NULL NULL R NULL 🔥 +# -------------------------------------- +# test type coercion (compare to int) +# queries should not error +# -------------------------------------- + +query BB +select ascii_1 = 1 as col1, 1 = ascii_1 as col2 from test_basic_operator; +---- +false false +false false +false false +false false +false false +false false +false false +false false +false false +NULL NULL +NULL NULL + +query BB +select ascii_1 <> 1 as col1, 1 <> ascii_1 as col2 from test_basic_operator; +---- +true true +true true +true true +true true +true true +true true +true true +true true +true true +NULL NULL +NULL NULL + +# Coercion to date/time +query BBB +select ts = '2024-08-09T12:13:14'::timestamp, d = '2024-08-08'::date, t = '12:13:14'::time from test_datetime; +---- +true false true +false false false +NULL NULL NULL + # -------------------------------------- # column comparison as filters # -------------------------------------- diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index 435b4bc3c5a8..c54e2aa7002c 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -34,6 +34,14 @@ statement ok create table test_substr as select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base; +statement ok +create table test_datetime as +select + arrow_cast(column1, 'Utf8View') as ts, + arrow_cast(column2, 'Utf8View') as d, + arrow_cast(column3, 'Utf8View') as t +from test_datetime_base; + statement ok drop table test_source @@ -51,6 +59,9 @@ drop table test_basic_operator; statement ok drop table test_substr_base; +statement ok +drop table test_datetime_base; + # -------------------------------------- # String_view specific tests @@ -783,7 +794,7 @@ EXPLAIN SELECT FROM test; ---- logical_plan -01)Projection: regexp_match(CAST(test.column1_utf8view AS Utf8), Utf8("^https?://(?:www\.)?([^/]+)/.*$")) AS k +01)Projection: regexp_match(test.column1_utf8view, Utf8View("^https?://(?:www\.)?([^/]+)/.*$")) AS k 02)--TableScan: test projection=[column1_utf8view] ## Ensure no casts for REGEXP_REPLACE diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index d671798b7d0f..0afe39de1795 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -459,14 +459,14 @@ create table t as values({r: 'a', c: 1}), ({r: 'b', c: 2.3}); query ? select * from t; ---- -{c0: a, c1: 1.0} -{c0: b, c1: 2.3} +{r: a, c: 1.0} +{r: b, c: 2.3} query T select arrow_typeof(column1) from t; ---- -Struct([Field { name: "c0", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) -Struct([Field { name: "c0", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) +Struct([Field { name: "r", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) +Struct([Field { name: "r", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) statement ok drop table t; diff --git a/dev/changelog/45.0.0.md b/dev/changelog/45.0.0.md index 2303eee92a1d..ca905c0a1a56 100644 --- a/dev/changelog/45.0.0.md +++ b/dev/changelog/45.0.0.md @@ -19,7 +19,7 @@ under the License. # Apache DataFusion 45.0.0 Changelog -This release consists of 252 commits from 83 contributors. See credits at the end of this changelog for more information. +This release consists of 258 commits from 83 contributors. See credits at the end of this changelog for more information. **Breaking changes:** @@ -94,6 +94,7 @@ This release consists of 252 commits from 83 contributors. See credits at the en - Support arrays_overlap function (alias of `array_has_any`) [#14217](https://github.com/apache/datafusion/pull/14217) (erenavsarogullari) - chore: Adding commit activity badge [#14386](https://github.com/apache/datafusion/pull/14386) (comphead) - docs: Clarify join behavior in `DataFrame::join` [#14393](https://github.com/apache/datafusion/pull/14393) (rkrishn7) +- Prepare for `45.0.0` release: Version and Changelog [#14397](https://github.com/apache/datafusion/pull/14397) (alamb) **Other:** @@ -290,13 +291,18 @@ This release consists of 252 commits from 83 contributors. See credits at the en - FFI support for versions and alternate tokio runtimes [#13937](https://github.com/apache/datafusion/pull/13937) (timsaucer) - Do not rename struct fields when coercing types in `CASE` [#14384](https://github.com/apache/datafusion/pull/14384) (alamb) - Add `TableProvider::insert_into` into FFI Bindings [#14391](https://github.com/apache/datafusion/pull/14391) (davisp) +- [branch-45]: Backport chore: Upgrade to `arrow`/`parquet` `54.1.0` and fix clippy/ci (#14415) [#14453](https://github.com/apache/datafusion/pull/14453) (alamb) +- [release-45] Fix join type coercion (#14387) [#14454](https://github.com/apache/datafusion/pull/14454) (alamb) +- [branch-45] Support `Utf8View` to `numeric` coercion (#14377) [#14455](https://github.com/apache/datafusion/pull/14455) (alamb) +- [branch-45] Update REGEXP_MATCH scalar function to support Utf8View (#14449) [#14457](https://github.com/apache/datafusion/pull/14457) (alamb) +- [branch-45] Fix regression list Type Coercion List with inner type struct which has large/view types (#14385) [#14456](https://github.com/apache/datafusion/pull/14456) (alamb) ## Credits Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. ``` - 46 Andrew Lamb + 52 Andrew Lamb 22 Ian Lai 20 dependabot[bot] 8 Bruce Ritchie