diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml index 0264b749729b..975bcbe5e8ff 100644 --- a/arrow-avro/Cargo.toml +++ b/arrow-avro/Cargo.toml @@ -25,7 +25,7 @@ authors = { workspace = true } license = { workspace = true } keywords = { workspace = true } include = { workspace = true } -edition = { workspace = true } +edition = "2024" rust-version = { workspace = true } [lib] @@ -93,4 +93,4 @@ harness = false [[bench]] name = "avro_writer" -harness = false \ No newline at end of file +harness = false diff --git a/arrow-avro/benches/avro_writer.rs b/arrow-avro/benches/avro_writer.rs index 085ec66c1fb6..57ab48663840 100644 --- a/arrow-avro/benches/avro_writer.rs +++ b/arrow-avro/benches/avro_writer.rs @@ -22,21 +22,21 @@ extern crate criterion; extern crate once_cell; use arrow_array::{ + ArrayRef, BinaryArray, BooleanArray, Decimal32Array, Decimal64Array, Decimal128Array, + Decimal256Array, FixedSizeBinaryArray, Float32Array, Float64Array, ListArray, PrimitiveArray, + RecordBatch, StringArray, StructArray, builder::{ListBuilder, StringBuilder}, types::{Int32Type, Int64Type, IntervalMonthDayNanoType, TimestampMicrosecondType}, - ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Decimal256Array, Decimal32Array, - Decimal64Array, FixedSizeBinaryArray, Float32Array, Float64Array, ListArray, PrimitiveArray, - RecordBatch, StringArray, StructArray, }; use arrow_avro::writer::AvroWriter; -use arrow_buffer::{i256, Buffer}; +use arrow_buffer::{Buffer, i256}; use arrow_schema::{DataType, Field, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode}; -use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput}; +use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; use once_cell::sync::Lazy; use rand::{ + Rng, SeedableRng, distr::uniform::{SampleRange, SampleUniform}, rngs::StdRng, - Rng, SeedableRng, }; use std::collections::HashMap; use std::io::Cursor; diff --git a/arrow-avro/benches/decoder.rs b/arrow-avro/benches/decoder.rs index 5ab0f847efcc..5db99b4db7ec 100644 --- a/arrow-avro/benches/decoder.rs +++ b/arrow-avro/benches/decoder.rs @@ -26,10 +26,10 @@ extern crate once_cell; extern crate uuid; use apache_avro::types::Value; -use apache_avro::{to_avro_datum, Decimal, Schema as ApacheSchema}; -use arrow_avro::schema::{Fingerprint, FingerprintAlgorithm, CONFLUENT_MAGIC, SINGLE_OBJECT_MAGIC}; +use apache_avro::{Decimal, Schema as ApacheSchema, to_avro_datum}; +use arrow_avro::schema::{CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, SINGLE_OBJECT_MAGIC}; use arrow_avro::{reader::ReaderBuilder, schema::AvroSchema}; -use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput}; +use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; use once_cell::sync::Lazy; use std::{hint::black_box, time::Duration}; use uuid::Uuid; diff --git a/arrow-avro/examples/decode_kafka_stream.rs b/arrow-avro/examples/decode_kafka_stream.rs index f5b0f2e6575b..84ea3d39319e 100644 --- a/arrow-avro/examples/decode_kafka_stream.rs +++ b/arrow-avro/examples/decode_kafka_stream.rs @@ -35,7 +35,7 @@ use arrow_array::{Int64Array, RecordBatch, StringArray}; use arrow_avro::reader::ReaderBuilder; use arrow_avro::schema::{ - AvroSchema, Fingerprint, FingerprintAlgorithm, SchemaStore, CONFLUENT_MAGIC, + AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, SchemaStore, }; use arrow_schema::ArrowError; diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs index 0de010cd6b67..f946a5a1b95a 100644 --- a/arrow-avro/src/codec.rs +++ b/arrow-avro/src/codec.rs @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. use crate::schema::{ - make_full_name, Array, Attributes, ComplexType, Enum, Fixed, Map, Nullability, PrimitiveType, - Record, Schema, Type, TypeName, AVRO_ENUM_SYMBOLS_METADATA_KEY, - AVRO_FIELD_DEFAULT_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY, AVRO_NAME_METADATA_KEY, + AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_FIELD_DEFAULT_METADATA_KEY, AVRO_NAME_METADATA_KEY, + AVRO_NAMESPACE_METADATA_KEY, Array, Attributes, ComplexType, Enum, Fixed, Map, Nullability, + PrimitiveType, Record, Schema, Type, TypeName, make_full_name, }; use arrow_schema::{ - ArrowError, DataType, Field, Fields, IntervalUnit, TimeUnit, UnionFields, UnionMode, - DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, + ArrowError, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType, Field, Fields, + IntervalUnit, TimeUnit, UnionFields, UnionMode, }; #[cfg(feature = "small_decimals")] use arrow_schema::{DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION}; @@ -324,14 +324,14 @@ impl AvroDataType { Codec::Null => { return Err(ArrowError::SchemaError( "Default for `null` type must be JSON null".to_string(), - )) + )); } Codec::Boolean => match default_json { Value::Bool(b) => AvroLiteral::Boolean(*b), _ => { return Err(ArrowError::SchemaError( "Boolean default must be a JSON boolean".to_string(), - )) + )); } }, Codec::Int32 | Codec::Date32 | Codec::TimeMillis => { @@ -393,7 +393,7 @@ impl AvroDataType { _ => { return Err(ArrowError::SchemaError( "Default value must be a JSON array for Avro array type".to_string(), - )) + )); } }, Codec::Map(val_dt) => match default_json { @@ -407,7 +407,7 @@ impl AvroDataType { _ => { return Err(ArrowError::SchemaError( "Default value must be a JSON object for Avro map type".to_string(), - )) + )); } }, Codec::Struct(fields) => match default_json { @@ -449,7 +449,7 @@ impl AvroDataType { _ => { return Err(ArrowError::SchemaError( "Default value for record/struct must be a JSON object".to_string(), - )) + )); } }, Codec::Union(encodings, _, _) => { @@ -1622,7 +1622,7 @@ impl<'a> Maker<'a> { _ => { return Err(ArrowError::ParseError(format!( "Illegal promotion {write_primitive:?} to {read_primitive:?}" - ))) + ))); } }; let mut datatype = self.parse_type(reader_schema, None)?; @@ -1894,8 +1894,8 @@ impl<'a> Maker<'a> { mod tests { use super::*; use crate::schema::{ - Array, Attributes, ComplexType, Field as AvroFieldSchema, Fixed, PrimitiveType, Record, - Schema, Type, TypeName, AVRO_ROOT_RECORD_DEFAULT_NAME, + AVRO_ROOT_RECORD_DEFAULT_NAME, Array, Attributes, ComplexType, Field as AvroFieldSchema, + Fixed, PrimitiveType, Record, Schema, Type, TypeName, }; use indexmap::IndexMap; use serde_json::{self, Value}; diff --git a/arrow-avro/src/reader/header.rs b/arrow-avro/src/reader/header.rs index 240536c311e5..aac267f50e9e 100644 --- a/arrow-avro/src/reader/header.rs +++ b/arrow-avro/src/reader/header.rs @@ -17,9 +17,9 @@ //! Decoder for [`Header`] -use crate::compression::{CompressionCodec, CODEC_METADATA_KEY}; +use crate::compression::{CODEC_METADATA_KEY, CompressionCodec}; use crate::reader::vlq::VLQDecoder; -use crate::schema::{Schema, SCHEMA_METADATA_KEY}; +use crate::schema::{SCHEMA_METADATA_KEY, Schema}; use arrow_schema::ArrowError; use std::io::BufRead; diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs index f14467e86e7a..def0fffe39fb 100644 --- a/arrow-avro/src/reader/mod.rs +++ b/arrow-avro/src/reader/mod.rs @@ -477,8 +477,8 @@ use crate::codec::AvroFieldBuilder; use crate::reader::header::read_header; use crate::schema::{ - AvroSchema, Fingerprint, FingerprintAlgorithm, Schema, SchemaStore, CONFLUENT_MAGIC, - SINGLE_OBJECT_MAGIC, + AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, SINGLE_OBJECT_MAGIC, Schema, + SchemaStore, }; use arrow_array::{RecordBatch, RecordBatchReader}; use arrow_schema::{ArrowError, SchemaRef}; @@ -695,7 +695,7 @@ impl Decoder { None => { return Err(ArrowError::ParseError( "Missing magic bytes and fingerprint".to_string(), - )) + )); } } } @@ -1267,9 +1267,9 @@ mod test { use crate::reader::record::RecordDecoder; use crate::reader::{Decoder, Reader, ReaderBuilder}; use crate::schema::{ - AvroSchema, Fingerprint, FingerprintAlgorithm, PrimitiveType, SchemaStore, - AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY, AVRO_NAME_METADATA_KEY, - CONFLUENT_MAGIC, SINGLE_OBJECT_MAGIC, + AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_NAME_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY, + AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, PrimitiveType, + SINGLE_OBJECT_MAGIC, SchemaStore, }; use crate::test_util::arrow_test_data; use crate::writer::AvroWriter; @@ -1289,7 +1289,7 @@ mod test { use arrow_array::types::{Int32Type, IntervalMonthDayNanoType}; use arrow_array::*; use arrow_buffer::{ - i256, Buffer, IntervalMonthDayNano, NullBuffer, OffsetBuffer, ScalarBuffer, + Buffer, IntervalMonthDayNano, NullBuffer, OffsetBuffer, ScalarBuffer, i256, }; #[cfg(feature = "avro_custom_types")] use arrow_schema::{ @@ -1302,8 +1302,8 @@ mod test { }; use bytes::Bytes; use futures::executor::block_on; - use futures::{stream, Stream, StreamExt, TryStreamExt}; - use serde_json::{json, Value}; + use futures::{Stream, StreamExt, TryStreamExt, stream}; + use serde_json::{Value, json}; use std::collections::HashMap; use std::fs::File; use std::io::{BufReader, Cursor}; @@ -2810,7 +2810,7 @@ mod test { let top_meta = proj_field.metadata().clone(); let (expected_field_ref, expected_col): (Arc, ArrayRef) = match (full_field.data_type(), proj_field.data_type()) { - (&DataType::List(_), DataType::List(ref proj_elem)) => { + (&DataType::List(_), DataType::List(proj_elem)) => { let new_col = rebuild_list_array_with_element(&col_full, proj_elem.clone(), false); let nf = Field::new( @@ -2821,7 +2821,7 @@ mod test { .with_metadata(top_meta); (Arc::new(nf), new_col) } - (&DataType::LargeList(_), DataType::LargeList(ref proj_elem)) => { + (&DataType::LargeList(_), DataType::LargeList(proj_elem)) => { let new_col = rebuild_list_array_with_element(&col_full, proj_elem.clone(), true); let nf = Field::new( diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs index f2376e5ade62..99e782b0fd93 100644 --- a/arrow-avro/src/reader/record.rs +++ b/arrow-avro/src/reader/record.rs @@ -21,15 +21,15 @@ use crate::codec::{ }; use crate::reader::cursor::AvroCursor; use crate::schema::Nullability; -use arrow_array::builder::{Decimal128Builder, Decimal256Builder, IntervalMonthDayNanoBuilder}; #[cfg(feature = "small_decimals")] use arrow_array::builder::{Decimal32Builder, Decimal64Builder}; +use arrow_array::builder::{Decimal128Builder, Decimal256Builder, IntervalMonthDayNanoBuilder}; use arrow_array::types::*; use arrow_array::*; use arrow_buffer::*; use arrow_schema::{ - ArrowError, DataType, Field as ArrowField, FieldRef, Fields, Schema as ArrowSchema, SchemaRef, - UnionFields, UnionMode, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, + ArrowError, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType, Field as ArrowField, + FieldRef, Fields, Schema as ArrowSchema, SchemaRef, UnionFields, UnionMode, }; #[cfg(feature = "small_decimals")] use arrow_schema::{DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION}; @@ -4047,8 +4047,8 @@ mod tests { } #[test] - fn test_record_append_default_missing_fields_without_projector_defaults_yields_type_nulls_or_empties( - ) { + fn test_record_append_default_missing_fields_without_projector_defaults_yields_type_nulls_or_empties() + { let fields = vec![("a", DataType::Int32, true), ("b", DataType::Utf8, true)]; let mut field_refs: Vec = Vec::new(); let mut encoders: Vec = Vec::new(); diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs index 6b985a2fc6c2..b3081bbd09ab 100644 --- a/arrow-avro/src/schema.rs +++ b/arrow-avro/src/schema.rs @@ -22,7 +22,7 @@ use arrow_schema::{ UnionMode, }; use serde::{Deserialize, Serialize}; -use serde_json::{json, Map as JsonMap, Value}; +use serde_json::{Map as JsonMap, Value, json}; #[cfg(feature = "sha256")] use sha2::{Digest, Sha256}; use std::borrow::Cow; @@ -1432,7 +1432,7 @@ fn datatype_to_avro( _ => { return Err(ArrowError::SchemaError( "Map 'entries' field must be Struct(key,value)".into(), - )) + )); } }; let values_schema = process_datatype( @@ -1556,7 +1556,7 @@ fn datatype_to_avro( other => { return Err(ArrowError::NotYetImplemented(format!( "Arrow type {other:?} has no Avro representation" - ))) + ))); } }; Ok((val, extras)) @@ -2148,9 +2148,11 @@ mod tests { store.lookup(&Fingerprint::Rabin(fp_val)).cloned(), Some(schema.clone()) ); - assert!(store - .lookup(&Fingerprint::Rabin(fp_val.wrapping_add(1))) - .is_none()); + assert!( + store + .lookup(&Fingerprint::Rabin(fp_val.wrapping_add(1))) + .is_none() + ); } Fingerprint::Id(_id) => { unreachable!("This test should only generate Rabin fingerprints") diff --git a/arrow-avro/src/writer/encoder.rs b/arrow-avro/src/writer/encoder.rs index 363fee6f22b9..0fa217e89488 100644 --- a/arrow-avro/src/writer/encoder.rs +++ b/arrow-avro/src/writer/encoder.rs @@ -241,9 +241,9 @@ impl<'a> FieldEncoder<'a> { DataType::Time32(TimeUnit::Millisecond) => { Encoder::Time32Millis(IntEncoder(array.as_primitive::())) } - DataType::Time64(TimeUnit::Microsecond) => { - Encoder::Time64Micros(LongEncoder(array.as_primitive::())) - } + DataType::Time64(TimeUnit::Microsecond) => Encoder::Time64Micros(LongEncoder( + array.as_primitive::(), + )), DataType::Float32 => { Encoder::Float32(F32Encoder(array.as_primitive::())) } @@ -277,36 +277,30 @@ impl<'a> FieldEncoder<'a> { } }, DataType::Interval(unit) => match unit { - IntervalUnit::MonthDayNano => { - Encoder::IntervalMonthDayNano(DurationEncoder( - array.as_primitive::(), - )) - } - IntervalUnit::YearMonth => { - Encoder::IntervalYearMonth(DurationEncoder( - array.as_primitive::(), - )) - } + IntervalUnit::MonthDayNano => Encoder::IntervalMonthDayNano(DurationEncoder( + array.as_primitive::(), + )), + IntervalUnit::YearMonth => Encoder::IntervalYearMonth(DurationEncoder( + array.as_primitive::(), + )), IntervalUnit::DayTime => Encoder::IntervalDayTime(DurationEncoder( array.as_primitive::(), )), - } - DataType::Duration(tu) => { - match tu { - TimeUnit::Second => Encoder::DurationSeconds(LongEncoder( - array.as_primitive::(), - )), - TimeUnit::Millisecond => Encoder::DurationMillis(LongEncoder( - array.as_primitive::(), - )), - TimeUnit::Microsecond => Encoder::DurationMicros(LongEncoder( - array.as_primitive::(), - )), - TimeUnit::Nanosecond => Encoder::DurationNanos(LongEncoder( - array.as_primitive::(), - )), - } - } + }, + DataType::Duration(tu) => match tu { + TimeUnit::Second => Encoder::DurationSeconds(LongEncoder( + array.as_primitive::(), + )), + TimeUnit::Millisecond => Encoder::DurationMillis(LongEncoder( + array.as_primitive::(), + )), + TimeUnit::Microsecond => Encoder::DurationMicros(LongEncoder( + array.as_primitive::(), + )), + TimeUnit::Nanosecond => Encoder::DurationNanos(LongEncoder( + array.as_primitive::(), + )), + }, other => { return Err(ArrowError::NotYetImplemented(format!( "Avro scalar type not yet supported: {other:?}" @@ -349,12 +343,12 @@ impl<'a> FieldEncoder<'a> { other => { return Err(ArrowError::SchemaError(format!( "Avro array site requires Arrow List/LargeList, found: {other:?}" - ))) + ))); } }, - FieldPlan::Decimal {size} => match array.data_type() { + FieldPlan::Decimal { size } => match array.data_type() { #[cfg(feature = "small_decimals")] - DataType::Decimal32(_,_) => { + DataType::Decimal32(_, _) => { let arr = array .as_any() .downcast_ref::() @@ -362,49 +356,61 @@ impl<'a> FieldEncoder<'a> { Encoder::Decimal32(DecimalEncoder::<4, Decimal32Array>::new(arr, *size)) } #[cfg(feature = "small_decimals")] - DataType::Decimal64(_,_) => { + DataType::Decimal64(_, _) => { let arr = array .as_any() .downcast_ref::() .ok_or_else(|| ArrowError::SchemaError("Expected Decimal64Array".into()))?; Encoder::Decimal64(DecimalEncoder::<8, Decimal64Array>::new(arr, *size)) } - DataType::Decimal128(_,_) => { + DataType::Decimal128(_, _) => { let arr = array .as_any() .downcast_ref::() - .ok_or_else(|| ArrowError::SchemaError("Expected Decimal128Array".into()))?; + .ok_or_else(|| { + ArrowError::SchemaError("Expected Decimal128Array".into()) + })?; Encoder::Decimal128(DecimalEncoder::<16, Decimal128Array>::new(arr, *size)) } - DataType::Decimal256(_,_) => { + DataType::Decimal256(_, _) => { let arr = array .as_any() .downcast_ref::() - .ok_or_else(|| ArrowError::SchemaError("Expected Decimal256Array".into()))?; + .ok_or_else(|| { + ArrowError::SchemaError("Expected Decimal256Array".into()) + })?; Encoder::Decimal256(DecimalEncoder::<32, Decimal256Array>::new(arr, *size)) } other => { return Err(ArrowError::SchemaError(format!( "Avro decimal site requires Arrow Decimal 32, 64, 128, or 256, found: {other:?}" - ))) + ))); } }, FieldPlan::Uuid => { let arr = array .as_any() .downcast_ref::() - .ok_or_else(|| ArrowError::SchemaError("Expected FixedSizeBinaryArray".into()))?; + .ok_or_else(|| { + ArrowError::SchemaError("Expected FixedSizeBinaryArray".into()) + })?; Encoder::Uuid(UuidEncoder(arr)) } - FieldPlan::Map { values_nullability, - value_plan } => { + FieldPlan::Map { + values_nullability, + value_plan, + } => { let arr = array .as_any() .downcast_ref::() .ok_or_else(|| ArrowError::SchemaError("Expected MapArray".into()))?; - Encoder::Map(Box::new(MapEncoder::try_new(arr, *values_nullability, value_plan.as_ref())?)) + Encoder::Map(Box::new(MapEncoder::try_new( + arr, + *values_nullability, + value_plan.as_ref(), + )?)) } - FieldPlan::Enum { symbols} => match array.data_type() { + FieldPlan::Enum { symbols } => match array.data_type() { DataType::Dictionary(key_dt, value_dt) => { if **key_dt != DataType::Int32 || **value_dt != DataType::Utf8 { return Err(ArrowError::SchemaError( @@ -447,9 +453,9 @@ impl<'a> FieldEncoder<'a> { other => { return Err(ArrowError::SchemaError(format!( "Avro enum site requires DataType::Dictionary, found: {other:?}" - ))) + ))); } - } + }, FieldPlan::Union { bindings } => { let arr = array .as_any() @@ -509,11 +515,7 @@ impl<'a> FieldEncoder<'a> { fn union_value_branch_byte(null_order: Nullability, is_null: bool) -> u8 { let nulls_first = null_order == Nullability::default(); - if nulls_first == is_null { - 0x00 - } else { - 0x02 - } + if nulls_first == is_null { 0x00 } else { 0x02 } } /// Per‑site encoder plan for a field. This mirrors the Avro structure, so nested @@ -728,7 +730,7 @@ impl FieldPlan { other => { return Err(ArrowError::SchemaError(format!( "Avro struct maps to Arrow Struct, found: {other:?}" - ))) + ))); } }; let mut bindings = Vec::with_capacity(avro_fields.len()); @@ -767,7 +769,7 @@ impl FieldPlan { other => { return Err(ArrowError::SchemaError(format!( "Avro map maps to Arrow DataType::Map, found: {other:?}" - ))) + ))); } }; let entries_struct_fields = match entries_field.data_type() { @@ -775,7 +777,7 @@ impl FieldPlan { other => { return Err(ArrowError::SchemaError(format!( "Arrow Map entries must be Struct, found: {other:?}" - ))) + ))); } }; let value_idx = @@ -822,7 +824,7 @@ impl FieldPlan { return Err(ArrowError::SchemaError(format!( "Avro decimal requires Arrow decimal, got {other:?} for field '{}'", arrow_field.name() - ))) + ))); } }; let sc = scale_opt.unwrap_or(0) as i32; // Avro scale defaults to 0 if absent @@ -837,12 +839,13 @@ impl FieldPlan { }) } Codec::Interval => match arrow_field.data_type() { - DataType::Interval(IntervalUnit::MonthDayNano | IntervalUnit::YearMonth | IntervalUnit::DayTime + DataType::Interval( + IntervalUnit::MonthDayNano | IntervalUnit::YearMonth | IntervalUnit::DayTime, ) => Ok(FieldPlan::Scalar), other => Err(ArrowError::SchemaError(format!( "Avro duration logical type requires Arrow Interval(MonthDayNano), found: {other:?}" ))), - } + }, Codec::Union(avro_branches, _, UnionMode::Dense) => { let arrow_union_fields = match arrow_field.data_type() { DataType::Union(fields, UnionMode::Dense) => fields, @@ -882,11 +885,9 @@ impl FieldPlan { Ok(FieldPlan::Union { bindings }) } - Codec::Union(_, _, UnionMode::Sparse) => { - Err(ArrowError::NotYetImplemented( - "Sparse Arrow unions are not yet supported".to_string(), - )) - } + Codec::Union(_, _, UnionMode::Sparse) => Err(ArrowError::NotYetImplemented( + "Sparse Arrow unions are not yet supported".to_string(), + )), _ => Ok(FieldPlan::Scalar), } } @@ -1069,7 +1070,7 @@ impl<'a> MapEncoder<'a> { other => { return Err(ArrowError::SchemaError(format!( "Avro map requires string keys; Arrow key type must be Utf8/LargeUtf8, found: {other:?}" - ))) + ))); } }; @@ -1079,13 +1080,13 @@ impl<'a> MapEncoder<'a> { other => { return Err(ArrowError::SchemaError(format!( "Arrow Map entries must be Struct, found: {other:?}" - ))) + ))); } }, _ => { return Err(ArrowError::SchemaError( "Expected MapArray with DataType::Map".into(), - )) + )); } }; @@ -1313,7 +1314,7 @@ impl<'a, O: OffsetSizeTrait> ListEncoder<'a, O> { _ => { return Err(ArrowError::SchemaError( "Expected List or LargeList for ListEncoder".into(), - )) + )); } }; let values_enc = prepare_value_site_encoder( diff --git a/arrow-avro/src/writer/format.rs b/arrow-avro/src/writer/format.rs index 90a2856330b9..f885fb86a687 100644 --- a/arrow-avro/src/writer/format.rs +++ b/arrow-avro/src/writer/format.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::compression::{CompressionCodec, CODEC_METADATA_KEY}; +use crate::compression::{CODEC_METADATA_KEY, CompressionCodec}; use crate::schema::{AvroSchema, SCHEMA_METADATA_KEY}; use crate::writer::encoder::write_long; use arrow_schema::{ArrowError, Schema}; diff --git a/arrow-avro/src/writer/mod.rs b/arrow-avro/src/writer/mod.rs index 8f7cb666dbae..0b87d6f13f07 100644 --- a/arrow-avro/src/writer/mod.rs +++ b/arrow-avro/src/writer/mod.rs @@ -62,7 +62,7 @@ use crate::compression::CompressionCodec; use crate::schema::{ AvroSchema, Fingerprint, FingerprintAlgorithm, FingerprintStrategy, SCHEMA_METADATA_KEY, }; -use crate::writer::encoder::{write_long, RecordEncoder, RecordEncoderBuilder}; +use crate::writer::encoder::{RecordEncoder, RecordEncoderBuilder, write_long}; use crate::writer::format::{AvroBinaryFormat, AvroFormat, AvroOcfFormat}; use arrow_array::RecordBatch; use arrow_schema::{ArrowError, Schema};