From 11c99a3232761e6d12162f6c09822de821b61c96 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 06:51:05 -0400
Subject: [PATCH 01/15] Document Arrow <--> Parquet schema conversion better

---
 parquet/src/arrow/arrow_reader/mod.rs | 19 +++++++-----
 parquet/src/arrow/mod.rs              | 44 +++++++++++++++++++++------
 parquet/src/arrow/schema/mod.rs       |  3 ++
 3 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 8bbe175dafb8..1fad2bc40fe8 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -314,14 +314,19 @@ impl ArrowReaderOptions {
         }
     }
 
-    /// Provide a schema to use when reading the parquet file. If provided it
-    /// takes precedence over the schema inferred from the file or the schema defined
-    /// in the file's metadata. If the schema is not compatible with the file's
-    /// schema an error will be returned when constructing the builder.
+    /// Provide a schema to use when reading the Parquet file.
     ///
-    /// This option is only required if you want to cast columns to a different type.
-    /// For example, if you wanted to cast from an Int64 in the Parquet file to a Timestamp
-    /// in the Arrow schema.
+    /// If provided, this schema takes precedence over the schema inferred from
+    /// the file or the schema defined  in the file's metadata (see [`arrow`]
+    /// documentation for more details). If the provided schema is not compatible
+    /// with the file's schema, an error will be returned when constructing the builder.
+    ///
+    /// This option is only required if you want to explicitly control the
+    /// conversion of Parquet types to Arrow types, such as casting a column to
+    /// a different type. For example, if you wanted to read an Int64 in
+    /// a Parquet file to a [`TimestampMicrosecondArray`] in the Arrow schema.
+    ///
+    /// # Notes
     ///
     /// The supplied schema must have the same number of columns as the parquet schema and
     /// the column names need to be the same.
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index b89c6ddcf8da..182e77038ea8 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -15,13 +15,41 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! API for reading/writing
-//! Arrow [RecordBatch](arrow_array::RecordBatch)es and
-//! [Array](arrow_array::Array)s to/from Parquet Files.
+//! API for reading/writing Arrow [`RecordBatch`]es and [`Array`]s to/from
+//! Parquet Files.
 //!
-//! See the [crate-level documentation](crate) for more details.
+//! See the [crate-level documentation](crate) for more details on other APIs
 //!
-//! # Example of writing Arrow record batch to Parquet file
+//! # Schema Conversion
+//!
+//! These APIs ensure that data in Arrow [`RecordBatch`]es written to Parquet are
+//! read back as [`RecordBatch`]es with the exact same types and values.
+//!
+//! Parquet and Arrow have different type systems, and there is not
+//! always a one to one mapping between the systems. For example, data
+//! stored as a Parquet [`BYTE_ARRAY`] can be read as either an Arrow
+//! [`BinaryViewArray`] or [`BinaryArray`].
+//!
+//! To recover the original Arrow types, the writers in this module add
+//! metadata in the [`ARROW_SCHEMA_META_KEY`] key to record the original Arrow
+//! schema. The readers look for this metadata to determine Arrow types, and if
+//! it is not present, use reasonable defaults. You can also control the type
+//! conversion process in more detail using:
+//!
+//! * [`ArrowSchemaConverter`] control the conversion of Arrow types to Parquet
+//!   types.
+//!
+//! * [`ArrowReaderOptions::with_schema`] to explicitly specify what Arrow types
+//!   to use when reading Parquet, overriding any metadata that may be present.
+//!
+//! [`RecordBatch`]: arrow_array::RecordBatch
+//! [`Array`]: arrow_array::Array
+//! [`BYTE_ARRAY`]: crate::basic::Type::BYTE_ARRAY
+//! [`BinaryViewArray`]: arrow_array::BinaryViewArray
+//! [`BinaryArray`]: arrow_array::BinaryArray
+//! [`ArrowReaderOptions::with_schema`]: arrow_reader::ArrowReaderOptions::with_schema
+//!
+//! # Example: Writing Arrow `RecordBatch` to Parquet file
 //!
 //!```rust
 //! # use arrow_array::{Int32Array, ArrayRef};
@@ -53,7 +81,7 @@
 //! writer.close().unwrap();
 //! ```
 //!
-//! # Example of reading parquet file into arrow record batch
+//! # Example: Reading Parquet file into Arrow `RecordBatch`
 //!
 //! ```rust
 //! # use std::fs::File;
@@ -93,11 +121,10 @@
 //! println!("Read {} records.", record_batch.num_rows());
 //! ```
 //!
-//! # Example of reading non-uniformly encrypted parquet file into arrow record batch
+//! # Example: Reading non-uniformly encrypted parquet file into arrow record batch
 //!
 //! Note: This requires the experimental `encryption` feature to be enabled at compile time.
 //!
-//!
 #![cfg_attr(feature = "encryption", doc = "```rust")]
 #![cfg_attr(not(feature = "encryption"), doc = "```ignore")]
 //! # use arrow_array::{Int32Array, ArrayRef};
@@ -168,7 +195,6 @@ pub use self::async_reader::ParquetRecordBatchStreamBuilder;
 pub use self::async_writer::AsyncArrowWriter;
 use crate::schema::types::{SchemaDescriptor, Type};
 use arrow_schema::{FieldRef, Schema};
-
 // continue to export deprecated methods until they are removed
 #[allow(deprecated)]
 pub use self::schema::arrow_to_parquet_schema;
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 89c42f5eaf92..fa6d23314c44 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -223,6 +223,9 @@ pub fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut WriterP
 }
 
 /// Converter for Arrow schema to Parquet schema
+/// 
+/// See the documentation on the [`arrow`] module for background 
+/// information on how Arrow schema is represented in Parquet.
 ///
 /// Example:
 /// ```

From 2949e780c52b287a6d930c039f7828fef33b11eb Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 11:58:27 -0400
Subject: [PATCH 02/15] Add a note about arrow metadata convention

---
 parquet/src/arrow/mod.rs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 182e77038ea8..fccf66901d13 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -32,9 +32,10 @@
 //!
 //! To recover the original Arrow types, the writers in this module add
 //! metadata in the [`ARROW_SCHEMA_META_KEY`] key to record the original Arrow
-//! schema. The readers look for this metadata to determine Arrow types, and if
-//! it is not present, use reasonable defaults. You can also control the type
-//! conversion process in more detail using:
+//! schema. The metadata follows the same convention as arrow-cpp based 
+//! implementations such as `pyarrow`. The reader looks for this metadata to 
+//! determine Arrow types, and if it is not present, use reasonable defaults. 
+//! You can also control the type conversion process in more detail using:
 //!
 //! * [`ArrowSchemaConverter`] control the conversion of Arrow types to Parquet
 //!   types.
@@ -204,7 +205,10 @@ pub use self::schema::{
     parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns, ArrowSchemaConverter, FieldLevels,
 };
 
-/// Schema metadata key used to store serialized Arrow IPC schema
+/// Schema metadata key used to store serialized Arrow schema
+/// 
+/// The Arrow schema is encoded using the Arrow IPC format, and then base64
+/// encoded. This is the same format used by arrow-cpp systems, such as pyarrow.
 pub const ARROW_SCHEMA_META_KEY: &str = "ARROW:schema";
 
 /// The value of this metadata key, if present on [`Field::metadata`], will be used

From ca7b0c31b3227a2afc9658e3555c520579dedc03 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 11:58:46 -0400
Subject: [PATCH 03/15] lint

---
 parquet/src/arrow/mod.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index fccf66901d13..dc70b60e06ce 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -32,9 +32,9 @@
 //!
 //! To recover the original Arrow types, the writers in this module add
 //! metadata in the [`ARROW_SCHEMA_META_KEY`] key to record the original Arrow
-//! schema. The metadata follows the same convention as arrow-cpp based 
-//! implementations such as `pyarrow`. The reader looks for this metadata to 
-//! determine Arrow types, and if it is not present, use reasonable defaults. 
+//! schema. The metadata follows the same convention as arrow-cpp based
+//! implementations such as `pyarrow`. The reader looks for this metadata to
+//! determine Arrow types, and if it is not present, use reasonable defaults.
 //! You can also control the type conversion process in more detail using:
 //!
 //! * [`ArrowSchemaConverter`] control the conversion of Arrow types to Parquet
@@ -206,7 +206,7 @@ pub use self::schema::{
 };
 
 /// Schema metadata key used to store serialized Arrow schema
-/// 
+///
 /// The Arrow schema is encoded using the Arrow IPC format, and then base64
 /// encoded. This is the same format used by arrow-cpp systems, such as pyarrow.
 pub const ARROW_SCHEMA_META_KEY: &str = "ARROW:schema";

From 893718d042f840b7af88df02fa53b0b0332f0538 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 12:00:46 -0400
Subject: [PATCH 04/15] Fix links

---
 parquet/src/arrow/arrow_reader/mod.rs | 5 ++++-
 parquet/src/arrow/schema/mod.rs       | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 1fad2bc40fe8..62052bc9918d 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -317,7 +317,7 @@ impl ArrowReaderOptions {
     /// Provide a schema to use when reading the Parquet file.
     ///
     /// If provided, this schema takes precedence over the schema inferred from
-    /// the file or the schema defined  in the file's metadata (see [`arrow`]
+    /// the file or the schema defined  in the file's metadata (see the [`arrow`]
     /// documentation for more details). If the provided schema is not compatible
     /// with the file's schema, an error will be returned when constructing the builder.
     ///
@@ -326,6 +326,9 @@ impl ArrowReaderOptions {
     /// a different type. For example, if you wanted to read an Int64 in
     /// a Parquet file to a [`TimestampMicrosecondArray`] in the Arrow schema.
     ///
+    /// [`arrow`]: crate::arrow
+    /// [`TimestampMicrosecondArray`]: arrow_array::TimestampMicrosecondArray
+    ///
     /// # Notes
     ///
     /// The supplied schema must have the same number of columns as the parquet schema and
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index fa6d23314c44..9fd34c4911cc 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -227,7 +227,9 @@ pub fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut WriterP
 /// See the documentation on the [`arrow`] module for background 
 /// information on how Arrow schema is represented in Parquet.
 ///
-/// Example:
+/// [`arrow`]: crate::arrow
+///
+/// # Example:
 /// ```
 /// # use std::sync::Arc;
 /// # use arrow_schema::{Field, Schema, DataType};

From 3a4e03da3a6e6375855fd1862672f5c6f9ede495 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 12:23:34 -0400
Subject: [PATCH 05/15] clarify what happens with provided schema

---
 parquet/src/arrow/arrow_reader/mod.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 62052bc9918d..855d98ea7443 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -316,10 +316,11 @@ impl ArrowReaderOptions {
 
     /// Provide a schema to use when reading the Parquet file.
     ///
-    /// If provided, this schema takes precedence over the schema inferred from
-    /// the file or the schema defined  in the file's metadata (see the [`arrow`]
-    /// documentation for more details). If the provided schema is not compatible
-    /// with the file's schema, an error will be returned when constructing the builder.
+    /// If provided, this schema takes precedence over the schema defined in the 
+    /// arrow file's metadata (see the [`arrow`] documentation for more details). 
+    /// If the provided schema is not compatible with the data stored in the 
+    /// parquet file schema, an error will be returned when constructing the 
+    /// builder.
     ///
     /// This option is only required if you want to explicitly control the
     /// conversion of Parquet types to Arrow types, such as casting a column to

From 8b0920c85fd738165d0b8c7544a2ffdf9b83d84a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 12:28:33 -0400
Subject: [PATCH 06/15] More docs

---
 parquet/src/arrow/arrow_reader/mod.rs | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 855d98ea7443..965600a553b8 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -286,7 +286,10 @@ impl<T> ArrowReaderBuilder<T> {
 pub struct ArrowReaderOptions {
     /// Should the reader strip any user defined metadata from the Arrow schema
     skip_arrow_metadata: bool,
-    /// If provided used as the schema for the file, otherwise the schema is read from the file
+    /// If provided used as the schema hint when determining the Arrow schema,
+    /// otherwise the schema hint is read from the [ARROW_SCHEMA_META_KEY]
+    ///
+    /// [ARROW_SCHEMA_META_KEY]: crate::arrow::ARROW_SCHEMA_META_KEY
     supplied_schema: Option<SchemaRef>,
     /// If true, attempt to read `OffsetIndex` and `ColumnIndex`
     pub(crate) page_index: bool,
@@ -316,10 +319,10 @@ impl ArrowReaderOptions {
 
     /// Provide a schema to use when reading the Parquet file.
     ///
-    /// If provided, this schema takes precedence over the schema defined in the 
-    /// arrow file's metadata (see the [`arrow`] documentation for more details). 
-    /// If the provided schema is not compatible with the data stored in the 
-    /// parquet file schema, an error will be returned when constructing the 
+    /// If provided, this schema takes precedence over the schema defined in the
+    /// arrow file's metadata (see the [`arrow`] documentation for more details).
+    /// If the provided schema is not compatible with the data stored in the
+    /// parquet file schema, an error will be returned when constructing the
     /// builder.
     ///
     /// This option is only required if you want to explicitly control the

From e417988252d9167efa32113e7bdcb07bd42f246c Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 12:38:40 -0400
Subject: [PATCH 07/15] fmt

---
 parquet/src/arrow/schema/mod.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 9fd34c4911cc..f3ecac844a3e 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -223,8 +223,8 @@ pub fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut WriterP
 }
 
 /// Converter for Arrow schema to Parquet schema
-/// 
-/// See the documentation on the [`arrow`] module for background 
+///
+/// See the documentation on the [`arrow`] module for background
 /// information on how Arrow schema is represented in Parquet.
 ///
 /// [`arrow`]: crate::arrow

From 812160005efe3afc63531b8ea051e1fa44a91f67 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 12:56:08 -0400
Subject: [PATCH 08/15] more claritification

---
 parquet/src/arrow/arrow_reader/mod.rs |  8 ++++----
 parquet/src/arrow/mod.rs              | 12 ++++++------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 965600a553b8..3c1a71f84525 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -319,8 +319,8 @@ impl ArrowReaderOptions {
 
     /// Provide a schema to use when reading the Parquet file.
     ///
-    /// If provided, this schema takes precedence over the schema defined in the
-    /// arrow file's metadata (see the [`arrow`] documentation for more details).
+    /// If provided, this schema takes precedence over any schema defined in the
+    /// file's schema hint in the metadata (see the [`arrow`] documentation for more details).
     /// If the provided schema is not compatible with the data stored in the
     /// parquet file schema, an error will be returned when constructing the
     /// builder.
@@ -335,8 +335,8 @@ impl ArrowReaderOptions {
     ///
     /// # Notes
     ///
-    /// The supplied schema must have the same number of columns as the parquet schema and
-    /// the column names need to be the same.
+    /// The provided schema must have the same number of columns as the parquet schema and
+    /// the column names must be the same.
     ///
     /// # Example
     /// ```
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index dc70b60e06ce..6a5ed8da0a17 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -30,17 +30,17 @@
 //! stored as a Parquet [`BYTE_ARRAY`] can be read as either an Arrow
 //! [`BinaryViewArray`] or [`BinaryArray`].
 //!
-//! To recover the original Arrow types, the writers in this module add
-//! metadata in the [`ARROW_SCHEMA_META_KEY`] key to record the original Arrow
-//! schema. The metadata follows the same convention as arrow-cpp based
-//! implementations such as `pyarrow`. The reader looks for this metadata to
-//! determine Arrow types, and if it is not present, use reasonable defaults.
+//! To recover the original Arrow types, the writers in this module add a "hint" to
+//! the metadata in the [`ARROW_SCHEMA_META_KEY`] key which records the original Arrow
+//! schema. The metadata hint follows the same convention as arrow-cpp based
+//! implementations such as `pyarrow`. The reader looks for the schema hint in the
+//! metadata to determine Arrow types, and if it is not present, use reasonable defaults.
 //! You can also control the type conversion process in more detail using:
 //!
 //! * [`ArrowSchemaConverter`] control the conversion of Arrow types to Parquet
 //!   types.
 //!
-//! * [`ArrowReaderOptions::with_schema`] to explicitly specify what Arrow types
+//! * [`ArrowReaderOptions::with_schema`] to explicitly specify your own Arrow schema hint
 //!   to use when reading Parquet, overriding any metadata that may be present.
 //!
 //! [`RecordBatch`]: arrow_array::RecordBatch

From 2defff51190cb01de50be316c54ee70e147f6308 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 13:01:31 -0400
Subject: [PATCH 09/15] More clarifications

---
 parquet/src/arrow/schema/mod.rs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index f3ecac844a3e..2ee9ff9c63fb 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -104,7 +104,13 @@ pub struct FieldLevels {
 /// Convert a parquet [`SchemaDescriptor`] to [`FieldLevels`]
 ///
 /// Columns not included within [`ProjectionMask`] will be ignored.
-///
+/// 
+/// The optional `hint` parameter is the desired Arrow schema. See the
+/// [`arrow`] module documentation for more information.
+/// 
+/// [`arrow`]: crate::arrow
+/// 
+/// # Notes:
 /// Where a field type in `hint` is compatible with the corresponding parquet type in `schema`, it
 /// will be used, otherwise the default arrow type for the given parquet column type will be used.
 ///
@@ -192,8 +198,12 @@ pub fn encode_arrow_schema(schema: &Schema) -> String {
     BASE64_STANDARD.encode(&len_prefix_schema)
 }
 
-/// Mutates writer metadata by storing the encoded Arrow schema.
+/// Mutates writer metadata by storing the encoded Arrow schema hint in
+/// [`ARROW_SCHEMA_META_KEY`].
+/// 
 /// If there is an existing Arrow schema metadata, it is replaced.
+/// 
+/// [`ARROW_SCHEMA_META_KEY`]: crate::arrow::ARROW_SCHEMA_META_KEY
 pub fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut WriterProperties) {
     let encoded = encode_arrow_schema(schema);
 

From 3426c8aeff9d160bd78b34a0a1052a97016065fe Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 13:31:56 -0400
Subject: [PATCH 10/15] fmt

---
 parquet/src/arrow/schema/mod.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 2ee9ff9c63fb..ad9e1f781f70 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -104,12 +104,12 @@ pub struct FieldLevels {
 /// Convert a parquet [`SchemaDescriptor`] to [`FieldLevels`]
 ///
 /// Columns not included within [`ProjectionMask`] will be ignored.
-/// 
+///
 /// The optional `hint` parameter is the desired Arrow schema. See the
 /// [`arrow`] module documentation for more information.
-/// 
+///
 /// [`arrow`]: crate::arrow
-/// 
+///
 /// # Notes:
 /// Where a field type in `hint` is compatible with the corresponding parquet type in `schema`, it
 /// will be used, otherwise the default arrow type for the given parquet column type will be used.
@@ -200,9 +200,9 @@ pub fn encode_arrow_schema(schema: &Schema) -> String {
 
 /// Mutates writer metadata by storing the encoded Arrow schema hint in
 /// [`ARROW_SCHEMA_META_KEY`].
-/// 
+///
 /// If there is an existing Arrow schema metadata, it is replaced.
-/// 
+///
 /// [`ARROW_SCHEMA_META_KEY`]: crate::arrow::ARROW_SCHEMA_META_KEY
 pub fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut WriterProperties) {
     let encoded = encode_arrow_schema(schema);

From eace423bb04bd65383424c40a1d4656c4faaeac5 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 14:12:18 -0400
Subject: [PATCH 11/15] Update parquet/src/arrow/arrow_reader/mod.rs

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 parquet/src/arrow/arrow_reader/mod.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 3c1a71f84525..2f670a64e108 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -317,10 +317,11 @@ impl ArrowReaderOptions {
         }
     }
 
-    /// Provide a schema to use when reading the Parquet file.
+    /// Provide a schema hint to use when reading the Parquet file.
+    ///
+    /// If provided, this schema takes precedence over any arrow schema embedded
+    /// in the metadata (see the [`arrow`] documentation for more details).
     ///
-    /// If provided, this schema takes precedence over any schema defined in the
-    /// file's schema hint in the metadata (see the [`arrow`] documentation for more details).
     /// If the provided schema is not compatible with the data stored in the
     /// parquet file schema, an error will be returned when constructing the
     /// builder.

From 3beb7a6c3b5fc35cef7a523b193ce137336b675f Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 14:37:13 -0400
Subject: [PATCH 12/15] Update parquet/src/arrow/mod.rs

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 parquet/src/arrow/mod.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 6a5ed8da0a17..9cb7c6148751 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -34,7 +34,12 @@
 //! the metadata in the [`ARROW_SCHEMA_META_KEY`] key which records the original Arrow
 //! schema. The metadata hint follows the same convention as arrow-cpp based
 //! implementations such as `pyarrow`. The reader looks for the schema hint in the
-//! metadata to determine Arrow types, and if it is not present, use reasonable defaults.
+//! metadata to determine Arrow types, and if it is not present, infers the arrow schema
+//! from the parquet schema.
+//!
+//! In situations where the embedded arrow schema is not compatible with the parquet
+//! schema, the parquet schema takes precedence - see [#1663](https://github.com/apache/arrow-rs/issues/1663)
+//!
 //! You can also control the type conversion process in more detail using:
 //!
 //! * [`ArrowSchemaConverter`] control the conversion of Arrow types to Parquet

From 6f803b174e5bd470d777dd4b3393fb7339d53da2 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 14:39:25 -0400
Subject: [PATCH 13/15] tweaks

---
 parquet/src/arrow/mod.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 9cb7c6148751..e8e9f6f186e1 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -37,8 +37,9 @@
 //! metadata to determine Arrow types, and if it is not present, infers the arrow schema
 //! from the parquet schema.
 //!
-//! In situations where the embedded arrow schema is not compatible with the parquet
-//! schema, the parquet schema takes precedence - see [#1663](https://github.com/apache/arrow-rs/issues/1663)
+//! In situations where the embedded Arrow schema is not compatible with the parquet
+//! schema, the parquet schema takes precedence and no error is raised.
+//! See [#1663](https://github.com/apache/arrow-rs/issues/1663)
 //!
 //! You can also control the type conversion process in more detail using:
 //!

From ad005543aad4b02fd3c906ea5062bd13fab130b6 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 14:41:33 -0400
Subject: [PATCH 14/15] capitalization OCD

---
 parquet/src/arrow/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index e8e9f6f186e1..54314cef9a51 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -34,10 +34,10 @@
 //! the metadata in the [`ARROW_SCHEMA_META_KEY`] key which records the original Arrow
 //! schema. The metadata hint follows the same convention as arrow-cpp based
 //! implementations such as `pyarrow`. The reader looks for the schema hint in the
-//! metadata to determine Arrow types, and if it is not present, infers the arrow schema
-//! from the parquet schema.
+//! metadata to determine Arrow types, and if it is not present, infers the Arrow schema
+//! from the Parquet schema.
 //!
-//! In situations where the embedded Arrow schema is not compatible with the parquet
+//! In situations where the embedded Arrow schema is not compatible with the Parquet
 //! schema, the parquet schema takes precedence and no error is raised.
 //! See [#1663](https://github.com/apache/arrow-rs/issues/1663)
 //!

From 652937ff2e3141d65f09c0a62e0c582ef9770c7b Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 7 May 2025 14:41:54 -0400
Subject: [PATCH 15/15] capitalization OCD

---
 parquet/src/arrow/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 54314cef9a51..76f8ef1bf068 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -38,7 +38,7 @@
 //! from the Parquet schema.
 //!
 //! In situations where the embedded Arrow schema is not compatible with the Parquet
-//! schema, the parquet schema takes precedence and no error is raised.
+//! schema, the Parquet schema takes precedence and no error is raised.
 //! See [#1663](https://github.com/apache/arrow-rs/issues/1663)
 //!
 //! You can also control the type conversion process in more detail using: