From edbc16985d735ee276e83c776f5e5989735948ca Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 27 Dec 2024 07:39:20 +0100
Subject: [PATCH 01/32] Bump griffe from 1.5.1 to 1.5.4 (#1474)

Bumps [griffe](https://github.com/mkdocstrings/griffe) from 1.5.1 to 1.5.4.
- [Release notes](https://github.com/mkdocstrings/griffe/releases)
- [Changelog](https://github.com/mkdocstrings/griffe/blob/main/CHANGELOG.md)
- [Commits](https://github.com/mkdocstrings/griffe/compare/1.5.1...1.5.4)

---
updated-dependencies:
- dependency-name: griffe
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 mkdocs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt
index bf992c03a3..45da03aa05 100644
--- a/mkdocs/requirements.txt
+++ b/mkdocs/requirements.txt
@@ -16,7 +16,7 @@
 # under the License.
 
 mkdocs==1.6.1
-griffe==1.5.1
+griffe==1.5.4
 jinja2==3.1.5
 mkdocstrings==0.27.0
 mkdocstrings-python==1.12.2

From f5bdae84f49a07056ba97db973d668a81f78f795 Mon Sep 17 00:00:00 2001
From: Tyler White <50381805+IndexSeek@users.noreply.github.com>
Date: Fri, 27 Dec 2024 01:40:39 -0500
Subject: [PATCH 02/32] docs: various spelling fixes (#1471)

---
 mkdocs/docs/api.md                                       | 2 +-
 mkdocs/docs/how-to-release.md                            | 2 +-
 mkdocs/docs/verify-release.md                            | 2 +-
 pyiceberg/table/__init__.py                              | 2 +-
 pyiceberg/utils/decimal.py                               | 2 +-
 tests/integration/test_writes/test_partitioned_writes.py | 6 +++---
 tests/table/test_init.py                                 | 2 +-
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md
index 7aa4159016..9c48718877 100644
--- a/mkdocs/docs/api.md
+++ b/mkdocs/docs/api.md
@@ -1005,7 +1005,7 @@ tbl.add_files(file_paths=file_paths)
 
 ## Schema evolution
 
-PyIceberg supports full schema evolution through the Python API. It takes care of setting the field-IDs and makes sure that only non-breaking changes are done (can be overriden).
+PyIceberg supports full schema evolution through the Python API. It takes care of setting the field-IDs and makes sure that only non-breaking changes are done (can be overridden).
 
 In the examples below, the `.update_schema()` is called from the table itself.
 
diff --git a/mkdocs/docs/how-to-release.md b/mkdocs/docs/how-to-release.md
index bea5548748..c44f56a9ff 100644
--- a/mkdocs/docs/how-to-release.md
+++ b/mkdocs/docs/how-to-release.md
@@ -31,7 +31,7 @@ This guide outlines the process for releasing PyIceberg in accordance with the [
 
 * A GPG key must be registered and published in the [Apache Iceberg KEYS file](https://downloads.apache.org/iceberg/KEYS). Follow [the instructions for setting up a GPG key and uploading it to the KEYS file](#set-up-gpg-key-and-upload-to-apache-iceberg-keys-file).
 * SVN Access
-    * Permission to upload artifacts to the [Apache development distribution](https://dist.apache.org/repos/dist/dev/iceberg/) (requires Apache Commmitter access).
+    * Permission to upload artifacts to the [Apache development distribution](https://dist.apache.org/repos/dist/dev/iceberg/) (requires Apache Committer access).
     * Permission to upload artifacts to the [Apache release distribution](https://dist.apache.org/repos/dist/release/iceberg/) (requires Apache PMC access).
 * PyPI Access
     * The `twine` package must be installed for uploading releases to PyPi.
diff --git a/mkdocs/docs/verify-release.md b/mkdocs/docs/verify-release.md
index 07e4c32a86..6148bfebdb 100644
--- a/mkdocs/docs/verify-release.md
+++ b/mkdocs/docs/verify-release.md
@@ -111,7 +111,7 @@ To run the full test coverage, with both unit tests and integration tests:
 make test-coverage
 ```
 
-This will spin up Docker containers to faciliate running test coverage.
+This will spin up Docker containers to facilitate running test coverage.
 
 # Cast the vote
 
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 4ec3403bb3..2469a9ed7b 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -902,7 +902,7 @@ def scan(
 
         Args:
             row_filter:
-                A string or BooleanExpression that decsribes the
+                A string or BooleanExpression that describes the
                 desired rows
             selected_fields:
                 A tuple of strings representing the column names
diff --git a/pyiceberg/utils/decimal.py b/pyiceberg/utils/decimal.py
index 4432564dd1..99638d2a00 100644
--- a/pyiceberg/utils/decimal.py
+++ b/pyiceberg/utils/decimal.py
@@ -85,7 +85,7 @@ def bytes_to_decimal(value: bytes, scale: int) -> Decimal:
     """Return a decimal from the bytes.
 
     Args:
-        value (bytes): tbe bytes to be converted into a decimal.
+        value (bytes): the bytes to be converted into a decimal.
         scale (int): the scale of the decimal.
 
     Returns:
diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py
index b92c338931..8a3a5c9acc 100644
--- a/tests/integration/test_writes/test_partitioned_writes.py
+++ b/tests/integration/test_writes/test_partitioned_writes.py
@@ -395,7 +395,7 @@ def test_dynamic_partition_overwrite_unpartitioned_evolve_to_identity_transform(
     # For a long string, the lower bound and upper bound  is truncated
     # e.g. aaaaaaaaaaaaaaaaaaaaaa has lower bound of aaaaaaaaaaaaaaaa and upper bound of aaaaaaaaaaaaaaab
     # this makes strict metric evaluator determine the file evaluate as ROWS_MIGHT_NOT_MATCH
-    # this further causes the partitioned data file to be overwriten rather than deleted
+    # this further causes the partitioned data file to be overwritten rather than deleted
     if part_col == "string_long":
         expected_operations = ["append", "append", "overwrite", "append"]
     assert tbl.inspect.snapshots().to_pydict()["operation"] == expected_operations
@@ -539,7 +539,7 @@ def test_data_files_with_table_partitioned_with_null(
     #                    the first snapshot generates M3 with 6 delete data entries collected from M1 and M2.
     #                    ML3 = [M3]
     #
-    #                    The second snapshot generates M4 with 3 appended data entries and since M3 (previous manifests) only has delte entries it does not lint to it.
+    #                    The second snapshot generates M4 with 3 appended data entries and since M3 (previous manifests) only has delete entries it does not lint to it.
     #                    ML4 = [M4]
 
     # Append           : Append generates M5 with new data entries and links to all previous manifests which is M4 .
@@ -552,7 +552,7 @@ def test_data_files_with_table_partitioned_with_null(
     #                    ML6 = [M6, M7, M8]
     #
     #                    The second snapshot generates M9 with 3 appended data entries and it also looks at manifests in ML6 (previous manifests)
-    #                    it ignores M6 since it only has delte entries but it links to M7 and M8.
+    #                    it ignores M6 since it only has delete entries but it links to M7 and M8.
     #                    ML7 = [M9, M7, M8]
 
     # tldr:
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index bdc3d030fd..397fa9f537 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -527,7 +527,7 @@ def test_update_column(table_v1: Table, table_v2: Table) -> None:
         new_schema = table.transaction().update_schema().update_column("y", doc=COMMENT2)._apply()
         assert new_schema.find_field("y").doc == COMMENT2, "failed to update existing field doc"
 
-        # update existing doc to an emtpy string
+        # update existing doc to an empty string
         assert new_schema.find_field("y").doc == COMMENT2
         new_schema2 = table.transaction().update_schema().update_column("y", doc="")._apply()
         assert new_schema2.find_field("y").doc == "", "failed to remove existing field doc"

From 6e537e86d4db52b151088f3f3fdb012ee1c3cc77 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 27 Dec 2024 08:47:10 +0100
Subject: [PATCH 03/32] Bump coverage from 7.6.9 to 7.6.10 (#1473)

Bumps [coverage](https://github.com/nedbat/coveragepy) from 7.6.9 to 7.6.10.
- [Release notes](https://github.com/nedbat/coveragepy/releases)
- [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst)
- [Commits](https://github.com/nedbat/coveragepy/compare/7.6.9...7.6.10)

---
updated-dependencies:
- dependency-name: coverage
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 126 ++++++++++++++++++++++++++--------------------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 6e4f55f39a..e6afffab09 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -701,73 +701,73 @@ files = [
 
 [[package]]
 name = "coverage"
-version = "7.6.9"
+version = "7.6.10"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "coverage-7.6.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:85d9636f72e8991a1706b2b55b06c27545448baf9f6dbf51c4004609aacd7dcb"},
-    {file = "coverage-7.6.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:608a7fd78c67bee8936378299a6cb9f5149bb80238c7a566fc3e6717a4e68710"},
-    {file = "coverage-7.6.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96d636c77af18b5cb664ddf12dab9b15a0cfe9c0bde715da38698c8cea748bfa"},
-    {file = "coverage-7.6.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d75cded8a3cff93da9edc31446872d2997e327921d8eed86641efafd350e1df1"},
-    {file = "coverage-7.6.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7b15f589593110ae767ce997775d645b47e5cbbf54fd322f8ebea6277466cec"},
-    {file = "coverage-7.6.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:44349150f6811b44b25574839b39ae35291f6496eb795b7366fef3bd3cf112d3"},
-    {file = "coverage-7.6.9-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d891c136b5b310d0e702e186d70cd16d1119ea8927347045124cb286b29297e5"},
-    {file = "coverage-7.6.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:db1dab894cc139f67822a92910466531de5ea6034ddfd2b11c0d4c6257168073"},
-    {file = "coverage-7.6.9-cp310-cp310-win32.whl", hash = "sha256:41ff7b0da5af71a51b53f501a3bac65fb0ec311ebed1632e58fc6107f03b9198"},
-    {file = "coverage-7.6.9-cp310-cp310-win_amd64.whl", hash = "sha256:35371f8438028fdccfaf3570b31d98e8d9eda8bb1d6ab9473f5a390969e98717"},
-    {file = "coverage-7.6.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:932fc826442132dde42ee52cf66d941f581c685a6313feebed358411238f60f9"},
-    {file = "coverage-7.6.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:085161be5f3b30fd9b3e7b9a8c301f935c8313dcf928a07b116324abea2c1c2c"},
-    {file = "coverage-7.6.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccc660a77e1c2bf24ddbce969af9447a9474790160cfb23de6be4fa88e3951c7"},
-    {file = "coverage-7.6.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c69e42c892c018cd3c8d90da61d845f50a8243062b19d228189b0224150018a9"},
-    {file = "coverage-7.6.9-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0824a28ec542a0be22f60c6ac36d679e0e262e5353203bea81d44ee81fe9c6d4"},
-    {file = "coverage-7.6.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4401ae5fc52ad8d26d2a5d8a7428b0f0c72431683f8e63e42e70606374c311a1"},
-    {file = "coverage-7.6.9-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:98caba4476a6c8d59ec1eb00c7dd862ba9beca34085642d46ed503cc2d440d4b"},
-    {file = "coverage-7.6.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ee5defd1733fd6ec08b168bd4f5387d5b322f45ca9e0e6c817ea6c4cd36313e3"},
-    {file = "coverage-7.6.9-cp311-cp311-win32.whl", hash = "sha256:f2d1ec60d6d256bdf298cb86b78dd715980828f50c46701abc3b0a2b3f8a0dc0"},
-    {file = "coverage-7.6.9-cp311-cp311-win_amd64.whl", hash = "sha256:0d59fd927b1f04de57a2ba0137166d31c1a6dd9e764ad4af552912d70428c92b"},
-    {file = "coverage-7.6.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:99e266ae0b5d15f1ca8d278a668df6f51cc4b854513daab5cae695ed7b721cf8"},
-    {file = "coverage-7.6.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9901d36492009a0a9b94b20e52ebfc8453bf49bb2b27bca2c9706f8b4f5a554a"},
-    {file = "coverage-7.6.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abd3e72dd5b97e3af4246cdada7738ef0e608168de952b837b8dd7e90341f015"},
-    {file = "coverage-7.6.9-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff74026a461eb0660366fb01c650c1d00f833a086b336bdad7ab00cc952072b3"},
-    {file = "coverage-7.6.9-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65dad5a248823a4996724a88eb51d4b31587aa7aa428562dbe459c684e5787ae"},
-    {file = "coverage-7.6.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:22be16571504c9ccea919fcedb459d5ab20d41172056206eb2994e2ff06118a4"},
-    {file = "coverage-7.6.9-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f957943bc718b87144ecaee70762bc2bc3f1a7a53c7b861103546d3a403f0a6"},
-    {file = "coverage-7.6.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ae1387db4aecb1f485fb70a6c0148c6cdaebb6038f1d40089b1fc84a5db556f"},
-    {file = "coverage-7.6.9-cp312-cp312-win32.whl", hash = "sha256:1a330812d9cc7ac2182586f6d41b4d0fadf9be9049f350e0efb275c8ee8eb692"},
-    {file = "coverage-7.6.9-cp312-cp312-win_amd64.whl", hash = "sha256:b12c6b18269ca471eedd41c1b6a1065b2f7827508edb9a7ed5555e9a56dcfc97"},
-    {file = "coverage-7.6.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:899b8cd4781c400454f2f64f7776a5d87bbd7b3e7f7bda0cb18f857bb1334664"},
-    {file = "coverage-7.6.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:61f70dc68bd36810972e55bbbe83674ea073dd1dcc121040a08cdf3416c5349c"},
-    {file = "coverage-7.6.9-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a289d23d4c46f1a82d5db4abeb40b9b5be91731ee19a379d15790e53031c014"},
-    {file = "coverage-7.6.9-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e216d8044a356fc0337c7a2a0536d6de07888d7bcda76febcb8adc50bdbbd00"},
-    {file = "coverage-7.6.9-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c026eb44f744acaa2bda7493dad903aa5bf5fc4f2554293a798d5606710055d"},
-    {file = "coverage-7.6.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e77363e8425325384f9d49272c54045bbed2f478e9dd698dbc65dbc37860eb0a"},
-    {file = "coverage-7.6.9-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:777abfab476cf83b5177b84d7486497e034eb9eaea0d746ce0c1268c71652077"},
-    {file = "coverage-7.6.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:447af20e25fdbe16f26e84eb714ba21d98868705cb138252d28bc400381f6ffb"},
-    {file = "coverage-7.6.9-cp313-cp313-win32.whl", hash = "sha256:d872ec5aeb086cbea771c573600d47944eea2dcba8be5f3ee649bfe3cb8dc9ba"},
-    {file = "coverage-7.6.9-cp313-cp313-win_amd64.whl", hash = "sha256:fd1213c86e48dfdc5a0cc676551db467495a95a662d2396ecd58e719191446e1"},
-    {file = "coverage-7.6.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9e7484d286cd5a43744e5f47b0b3fb457865baf07bafc6bee91896364e1419"},
-    {file = "coverage-7.6.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e5ea1cf0872ee455c03e5674b5bca5e3e68e159379c1af0903e89f5eba9ccc3a"},
-    {file = "coverage-7.6.9-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d10e07aa2b91835d6abec555ec8b2733347956991901eea6ffac295f83a30e4"},
-    {file = "coverage-7.6.9-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:13a9e2d3ee855db3dd6ea1ba5203316a1b1fd8eaeffc37c5b54987e61e4194ae"},
-    {file = "coverage-7.6.9-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c38bf15a40ccf5619fa2fe8f26106c7e8e080d7760aeccb3722664c8656b030"},
-    {file = "coverage-7.6.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d5275455b3e4627c8e7154feaf7ee0743c2e7af82f6e3b561967b1cca755a0be"},
-    {file = "coverage-7.6.9-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8f8770dfc6e2c6a2d4569f411015c8d751c980d17a14b0530da2d7f27ffdd88e"},
-    {file = "coverage-7.6.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8d2dfa71665a29b153a9681edb1c8d9c1ea50dfc2375fb4dac99ea7e21a0bcd9"},
-    {file = "coverage-7.6.9-cp313-cp313t-win32.whl", hash = "sha256:5e6b86b5847a016d0fbd31ffe1001b63355ed309651851295315031ea7eb5a9b"},
-    {file = "coverage-7.6.9-cp313-cp313t-win_amd64.whl", hash = "sha256:97ddc94d46088304772d21b060041c97fc16bdda13c6c7f9d8fcd8d5ae0d8611"},
-    {file = "coverage-7.6.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:adb697c0bd35100dc690de83154627fbab1f4f3c0386df266dded865fc50a902"},
-    {file = "coverage-7.6.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:be57b6d56e49c2739cdf776839a92330e933dd5e5d929966fbbd380c77f060be"},
-    {file = "coverage-7.6.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1592791f8204ae9166de22ba7e6705fa4ebd02936c09436a1bb85aabca3e599"},
-    {file = "coverage-7.6.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e12ae8cc979cf83d258acb5e1f1cf2f3f83524d1564a49d20b8bec14b637f08"},
-    {file = "coverage-7.6.9-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb5555cff66c4d3d6213a296b360f9e1a8e323e74e0426b6c10ed7f4d021e464"},
-    {file = "coverage-7.6.9-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b9389a429e0e5142e69d5bf4a435dd688c14478a19bb901735cdf75e57b13845"},
-    {file = "coverage-7.6.9-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:592ac539812e9b46046620341498caf09ca21023c41c893e1eb9dbda00a70cbf"},
-    {file = "coverage-7.6.9-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a27801adef24cc30871da98a105f77995e13a25a505a0161911f6aafbd66e678"},
-    {file = "coverage-7.6.9-cp39-cp39-win32.whl", hash = "sha256:8e3c3e38930cfb729cb8137d7f055e5a473ddaf1217966aa6238c88bd9fd50e6"},
-    {file = "coverage-7.6.9-cp39-cp39-win_amd64.whl", hash = "sha256:e28bf44afa2b187cc9f41749138a64435bf340adfcacb5b2290c070ce99839d4"},
-    {file = "coverage-7.6.9-pp39.pp310-none-any.whl", hash = "sha256:f3ca78518bc6bc92828cd11867b121891d75cae4ea9e908d72030609b996db1b"},
-    {file = "coverage-7.6.9.tar.gz", hash = "sha256:4a8d8977b0c6ef5aeadcb644da9e69ae0dcfe66ec7f368c89c72e058bd71164d"},
+    {file = "coverage-7.6.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5c912978f7fbf47ef99cec50c4401340436d200d41d714c7a4766f377c5b7b78"},
+    {file = "coverage-7.6.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a01ec4af7dfeb96ff0078ad9a48810bb0cc8abcb0115180c6013a6b26237626c"},
+    {file = "coverage-7.6.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3b204c11e2b2d883946fe1d97f89403aa1811df28ce0447439178cc7463448a"},
+    {file = "coverage-7.6.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32ee6d8491fcfc82652a37109f69dee9a830e9379166cb73c16d8dc5c2915165"},
+    {file = "coverage-7.6.10-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675cefc4c06e3b4c876b85bfb7c59c5e2218167bbd4da5075cbe3b5790a28988"},
+    {file = "coverage-7.6.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f4f620668dbc6f5e909a0946a877310fb3d57aea8198bde792aae369ee1c23b5"},
+    {file = "coverage-7.6.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4eea95ef275de7abaef630c9b2c002ffbc01918b726a39f5a4353916ec72d2f3"},
+    {file = "coverage-7.6.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e2f0280519e42b0a17550072861e0bc8a80a0870de260f9796157d3fca2733c5"},
+    {file = "coverage-7.6.10-cp310-cp310-win32.whl", hash = "sha256:bc67deb76bc3717f22e765ab3e07ee9c7a5e26b9019ca19a3b063d9f4b874244"},
+    {file = "coverage-7.6.10-cp310-cp310-win_amd64.whl", hash = "sha256:0f460286cb94036455e703c66988851d970fdfd8acc2a1122ab7f4f904e4029e"},
+    {file = "coverage-7.6.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ea3c8f04b3e4af80e17bab607c386a830ffc2fb88a5484e1df756478cf70d1d3"},
+    {file = "coverage-7.6.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:507a20fc863cae1d5720797761b42d2d87a04b3e5aeb682ef3b7332e90598f43"},
+    {file = "coverage-7.6.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d37a84878285b903c0fe21ac8794c6dab58150e9359f1aaebbeddd6412d53132"},
+    {file = "coverage-7.6.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a534738b47b0de1995f85f582d983d94031dffb48ab86c95bdf88dc62212142f"},
+    {file = "coverage-7.6.10-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d7a2bf79378d8fb8afaa994f91bfd8215134f8631d27eba3e0e2c13546ce994"},
+    {file = "coverage-7.6.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6713ba4b4ebc330f3def51df1d5d38fad60b66720948112f114968feb52d3f99"},
+    {file = "coverage-7.6.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab32947f481f7e8c763fa2c92fd9f44eeb143e7610c4ca9ecd6a36adab4081bd"},
+    {file = "coverage-7.6.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7bbd8c8f1b115b892e34ba66a097b915d3871db7ce0e6b9901f462ff3a975377"},
+    {file = "coverage-7.6.10-cp311-cp311-win32.whl", hash = "sha256:299e91b274c5c9cdb64cbdf1b3e4a8fe538a7a86acdd08fae52301b28ba297f8"},
+    {file = "coverage-7.6.10-cp311-cp311-win_amd64.whl", hash = "sha256:489a01f94aa581dbd961f306e37d75d4ba16104bbfa2b0edb21d29b73be83609"},
+    {file = "coverage-7.6.10-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:27c6e64726b307782fa5cbe531e7647aee385a29b2107cd87ba7c0105a5d3853"},
+    {file = "coverage-7.6.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c56e097019e72c373bae32d946ecf9858fda841e48d82df7e81c63ac25554078"},
+    {file = "coverage-7.6.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7827a5bc7bdb197b9e066cdf650b2887597ad124dd99777332776f7b7c7d0d0"},
+    {file = "coverage-7.6.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:204a8238afe787323a8b47d8be4df89772d5c1e4651b9ffa808552bdf20e1d50"},
+    {file = "coverage-7.6.10-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e67926f51821b8e9deb6426ff3164870976fe414d033ad90ea75e7ed0c2e5022"},
+    {file = "coverage-7.6.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e78b270eadb5702938c3dbe9367f878249b5ef9a2fcc5360ac7bff694310d17b"},
+    {file = "coverage-7.6.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:714f942b9c15c3a7a5fe6876ce30af831c2ad4ce902410b7466b662358c852c0"},
+    {file = "coverage-7.6.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:abb02e2f5a3187b2ac4cd46b8ced85a0858230b577ccb2c62c81482ca7d18852"},
+    {file = "coverage-7.6.10-cp312-cp312-win32.whl", hash = "sha256:55b201b97286cf61f5e76063f9e2a1d8d2972fc2fcfd2c1272530172fd28c359"},
+    {file = "coverage-7.6.10-cp312-cp312-win_amd64.whl", hash = "sha256:e4ae5ac5e0d1e4edfc9b4b57b4cbecd5bc266a6915c500f358817a8496739247"},
+    {file = "coverage-7.6.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05fca8ba6a87aabdd2d30d0b6c838b50510b56cdcfc604d40760dae7153b73d9"},
+    {file = "coverage-7.6.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9e80eba8801c386f72e0712a0453431259c45c3249f0009aff537a517b52942b"},
+    {file = "coverage-7.6.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a372c89c939d57abe09e08c0578c1d212e7a678135d53aa16eec4430adc5e690"},
+    {file = "coverage-7.6.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec22b5e7fe7a0fa8509181c4aac1db48f3dd4d3a566131b313d1efc102892c18"},
+    {file = "coverage-7.6.10-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26bcf5c4df41cad1b19c84af71c22cbc9ea9a547fc973f1f2cc9a290002c8b3c"},
+    {file = "coverage-7.6.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e4630c26b6084c9b3cb53b15bd488f30ceb50b73c35c5ad7871b869cb7365fd"},
+    {file = "coverage-7.6.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2396e8116db77789f819d2bc8a7e200232b7a282c66e0ae2d2cd84581a89757e"},
+    {file = "coverage-7.6.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79109c70cc0882e4d2d002fe69a24aa504dec0cc17169b3c7f41a1d341a73694"},
+    {file = "coverage-7.6.10-cp313-cp313-win32.whl", hash = "sha256:9e1747bab246d6ff2c4f28b4d186b205adced9f7bd9dc362051cc37c4a0c7bd6"},
+    {file = "coverage-7.6.10-cp313-cp313-win_amd64.whl", hash = "sha256:254f1a3b1eef5f7ed23ef265eaa89c65c8c5b6b257327c149db1ca9d4a35f25e"},
+    {file = "coverage-7.6.10-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2ccf240eb719789cedbb9fd1338055de2761088202a9a0b73032857e53f612fe"},
+    {file = "coverage-7.6.10-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0c807ca74d5a5e64427c8805de15b9ca140bba13572d6d74e262f46f50b13273"},
+    {file = "coverage-7.6.10-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bcfa46d7709b5a7ffe089075799b902020b62e7ee56ebaed2f4bdac04c508d8"},
+    {file = "coverage-7.6.10-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e0de1e902669dccbf80b0415fb6b43d27edca2fbd48c74da378923b05316098"},
+    {file = "coverage-7.6.10-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7b444c42bbc533aaae6b5a2166fd1a797cdb5eb58ee51a92bee1eb94a1e1cb"},
+    {file = "coverage-7.6.10-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b330368cb99ef72fcd2dc3ed260adf67b31499584dc8a20225e85bfe6f6cfed0"},
+    {file = "coverage-7.6.10-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:9a7cfb50515f87f7ed30bc882f68812fd98bc2852957df69f3003d22a2aa0abf"},
+    {file = "coverage-7.6.10-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f93531882a5f68c28090f901b1d135de61b56331bba82028489bc51bdd818d2"},
+    {file = "coverage-7.6.10-cp313-cp313t-win32.whl", hash = "sha256:89d76815a26197c858f53c7f6a656686ec392b25991f9e409bcef020cd532312"},
+    {file = "coverage-7.6.10-cp313-cp313t-win_amd64.whl", hash = "sha256:54a5f0f43950a36312155dae55c505a76cd7f2b12d26abeebbe7a0b36dbc868d"},
+    {file = "coverage-7.6.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:656c82b8a0ead8bba147de9a89bda95064874c91a3ed43a00e687f23cc19d53a"},
+    {file = "coverage-7.6.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ccc2b70a7ed475c68ceb548bf69cec1e27305c1c2606a5eb7c3afff56a1b3b27"},
+    {file = "coverage-7.6.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5e37dc41d57ceba70956fa2fc5b63c26dba863c946ace9705f8eca99daecdc4"},
+    {file = "coverage-7.6.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0aa9692b4fdd83a4647eeb7db46410ea1322b5ed94cd1715ef09d1d5922ba87f"},
+    {file = "coverage-7.6.10-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa744da1820678b475e4ba3dfd994c321c5b13381d1041fe9c608620e6676e25"},
+    {file = "coverage-7.6.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c0b1818063dc9e9d838c09e3a473c1422f517889436dd980f5d721899e66f315"},
+    {file = "coverage-7.6.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:59af35558ba08b758aec4d56182b222976330ef8d2feacbb93964f576a7e7a90"},
+    {file = "coverage-7.6.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7ed2f37cfce1ce101e6dffdfd1c99e729dd2ffc291d02d3e2d0af8b53d13840d"},
+    {file = "coverage-7.6.10-cp39-cp39-win32.whl", hash = "sha256:4bcc276261505d82f0ad426870c3b12cb177752834a633e737ec5ee79bbdff18"},
+    {file = "coverage-7.6.10-cp39-cp39-win_amd64.whl", hash = "sha256:457574f4599d2b00f7f637a0700a6422243b3565509457b2dbd3f50703e11f59"},
+    {file = "coverage-7.6.10-pp39.pp310-none-any.whl", hash = "sha256:fd34e7b3405f0cc7ab03d54a334c17a9e802897580d964bd8c2001f4b9fd488f"},
+    {file = "coverage-7.6.10.tar.gz", hash = "sha256:7fb105327c8f8f0682e29843e2ff96af9dcbe5bab8eeb4b398c6a33a16d80a23"},
 ]
 
 [package.dependencies]

From a5be07a2c0544876abb02e767dd4cabc3d69128d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 27 Dec 2024 11:42:55 +0100
Subject: [PATCH 04/32] Bump mkdocstrings-python from 1.12.2 to 1.13.0 (#1472)

Bumps [mkdocstrings-python](https://github.com/mkdocstrings/python) from 1.12.2 to 1.13.0.
- [Release notes](https://github.com/mkdocstrings/python/releases)
- [Changelog](https://github.com/mkdocstrings/python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/mkdocstrings/python/compare/1.12.2...1.13.0)

---
updated-dependencies:
- dependency-name: mkdocstrings-python
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 mkdocs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt
index 45da03aa05..f374b85bea 100644
--- a/mkdocs/requirements.txt
+++ b/mkdocs/requirements.txt
@@ -19,7 +19,7 @@ mkdocs==1.6.1
 griffe==1.5.4
 jinja2==3.1.5
 mkdocstrings==0.27.0
-mkdocstrings-python==1.12.2
+mkdocstrings-python==1.13.0
 mkdocs-literate-nav==0.6.1
 mkdocs-autorefs==1.2.0
 mkdocs-gen-files==0.5.0

From a926d379e6d14ec5898aedc16aa5ac3e57e9ed2f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 28 Dec 2024 17:23:07 +0100
Subject: [PATCH 05/32] Bump boto3 from 1.35.81 to 1.35.88 (#1476)

Bumps [boto3](https://github.com/boto/boto3) from 1.35.81 to 1.35.88.
- [Release notes](https://github.com/boto/boto3/releases)
- [Commits](https://github.com/boto/boto3/compare/1.35.81...1.35.88)

---
updated-dependencies:
- dependency-name: boto3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index e6afffab09..893f5a4a9e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -25,24 +25,24 @@ tests = ["arrow", "dask[dataframe]", "docker", "pytest", "pytest-mock"]
 
 [[package]]
 name = "aiobotocore"
-version = "2.16.0"
+version = "2.16.1"
 description = "Async client for aws services using botocore and aiohttp"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "aiobotocore-2.16.0-py3-none-any.whl", hash = "sha256:eb3641a7b9c51113adbc33a029441de6201ebb026c64ff2e149c7fa802c9abfc"},
-    {file = "aiobotocore-2.16.0.tar.gz", hash = "sha256:6d6721961a81570e9b920b98778d95eec3d52a9f83b7844c6c5cfdbf2a2d6a11"},
+    {file = "aiobotocore-2.16.1-py3-none-any.whl", hash = "sha256:e7cf6295471224c82a111deaf31c2c3a4bcd6dbd6973e75c7fc4739fcccd5b0b"},
+    {file = "aiobotocore-2.16.1.tar.gz", hash = "sha256:0f94904c6a1d14d5aac0502fcc1d721b95ee60d46d8a0e546f6203de0410d522"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.9.2,<4.0.0"
 aioitertools = ">=0.5.1,<1.0.0"
-botocore = ">=1.35.74,<1.35.82"
+botocore = ">=1.35.74,<1.35.89"
 wrapt = ">=1.10.10,<2.0.0"
 
 [package.extras]
-awscli = ["awscli (>=1.36.15,<1.36.23)"]
-boto3 = ["boto3 (>=1.35.74,<1.35.82)"]
+awscli = ["awscli (>=1.36.15,<1.36.30)"]
+boto3 = ["boto3 (>=1.35.74,<1.35.89)"]
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -358,17 +358,17 @@ files = [
 
 [[package]]
 name = "boto3"
-version = "1.35.81"
+version = "1.35.88"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.35.81-py3-none-any.whl", hash = "sha256:742941b2424c0223d2d94a08c3485462fa7c58d816b62ca80f08e555243acee1"},
-    {file = "boto3-1.35.81.tar.gz", hash = "sha256:d2e95fa06f095b8e0c545dd678c6269d253809b2997c30f5ce8a956c410b4e86"},
+    {file = "boto3-1.35.88-py3-none-any.whl", hash = "sha256:7bc9b27ad87607256470c70a86c8b8c319ddd6ecae89cc191687cbf8ccb7b6a6"},
+    {file = "boto3-1.35.88.tar.gz", hash = "sha256:43c6a7a70bb226770a82a601870136e3bb3bf2808f4576ab5b9d7d140dbf1323"},
 ]
 
 [package.dependencies]
-botocore = ">=1.35.81,<1.36.0"
+botocore = ">=1.35.88,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -377,13 +377,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.35.81"
+version = "1.35.88"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.35.81-py3-none-any.whl", hash = "sha256:a7b13bbd959bf2d6f38f681676aab408be01974c46802ab997617b51399239f7"},
-    {file = "botocore-1.35.81.tar.gz", hash = "sha256:564c2478e50179e0b766e6a87e5e0cdd35e1bc37eb375c1cf15511f5dd13600d"},
+    {file = "botocore-1.35.88-py3-none-any.whl", hash = "sha256:e60cc3fbe8d7a10f70e7e852d76be2b29f23ead418a5899d366ea32b1eacb5a5"},
+    {file = "botocore-1.35.88.tar.gz", hash = "sha256:58dcd9a464c354b8c6c25261d8de830d175d9739eae568bf0c52e57116fb03c6"},
 ]
 
 [package.dependencies]

From e6465001bd8a47718ff79da4def5800962e6b895 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 29 Dec 2024 06:37:14 +0100
Subject: [PATCH 06/32] Bump moto from 5.0.24 to 5.0.25 (#1475)

Bumps [moto](https://github.com/getmoto/moto) from 5.0.24 to 5.0.25.
- [Release notes](https://github.com/getmoto/moto/releases)
- [Changelog](https://github.com/getmoto/moto/blob/master/CHANGELOG.md)
- [Commits](https://github.com/getmoto/moto/compare/5.0.24...5.0.25)

---
updated-dependencies:
- dependency-name: moto
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 53 +++++++++++++++++------------------------------------
 1 file changed, 17 insertions(+), 36 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 893f5a4a9e..640cab2733 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1874,23 +1874,6 @@ cryptography = "*"
 [package.extras]
 drafts = ["pycryptodome"]
 
-[[package]]
-name = "jsondiff"
-version = "2.2.1"
-description = "Diff JSON and JSON-like structures in Python"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "jsondiff-2.2.1-py3-none-any.whl", hash = "sha256:b1f0f7e2421881848b1d556d541ac01a91680cfcc14f51a9b62cdf4da0e56722"},
-    {file = "jsondiff-2.2.1.tar.gz", hash = "sha256:658d162c8a86ba86de26303cd86a7b37e1b2c1ec98b569a60e2ca6180545f7fe"},
-]
-
-[package.dependencies]
-pyyaml = "*"
-
-[package.extras]
-dev = ["build", "hypothesis", "pytest", "setuptools-scm"]
-
 [[package]]
 name = "jsonpatch"
 version = "1.33"
@@ -2249,13 +2232,13 @@ type = ["mypy (==1.11.2)"]
 
 [[package]]
 name = "moto"
-version = "5.0.24"
-description = ""
+version = "5.0.25"
+description = "A library that allows you to easily mock out tests based on AWS infrastructure"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "moto-5.0.24-py3-none-any.whl", hash = "sha256:4d826f1574849f18ddd2fcbf614d97f82c8fddfb9d95fac1078da01a39b57c10"},
-    {file = "moto-5.0.24.tar.gz", hash = "sha256:dba6426bd770fbb9d892633fbd35253cbc181eeaa0eba97d6f058720a8fe9b42"},
+    {file = "moto-5.0.25-py3-none-any.whl", hash = "sha256:ab790f9d7d08f30667a196af7cacead03e76c10be2d1148ea00a731d47918a1e"},
+    {file = "moto-5.0.25.tar.gz", hash = "sha256:deea8b158cec5a65c9635ae1fff4579d735b11ac8a0e5226fbbeb742ce0ce6b2"},
 ]
 
 [package.dependencies]
@@ -2271,10 +2254,9 @@ flask-cors = {version = "*", optional = true, markers = "extra == \"server\""}
 graphql-core = {version = "*", optional = true, markers = "extra == \"server\""}
 Jinja2 = ">=2.10.1"
 joserfc = {version = ">=0.9.0", optional = true, markers = "extra == \"server\""}
-jsondiff = {version = ">=1.1.2", optional = true, markers = "extra == \"server\""}
 jsonpath-ng = {version = "*", optional = true, markers = "extra == \"server\""}
 openapi-spec-validator = {version = ">=0.5.0", optional = true, markers = "extra == \"server\""}
-py-partiql-parser = {version = "0.5.6", optional = true, markers = "extra == \"server\""}
+py-partiql-parser = {version = "0.6.1", optional = true, markers = "extra == \"server\""}
 pyparsing = {version = ">=3.0.7", optional = true, markers = "extra == \"server\""}
 python-dateutil = ">=2.1,<3.0.0"
 PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"server\""}
@@ -2285,25 +2267,24 @@ werkzeug = ">=0.5,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1"
 xmltodict = "*"
 
 [package.extras]
-all = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "jsonschema", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)", "setuptools"]
+all = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsonpath-ng", "jsonschema", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)", "setuptools"]
 apigateway = ["PyYAML (>=5.1)", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)"]
 apigatewayv2 = ["PyYAML (>=5.1)", "openapi-spec-validator (>=0.5.0)"]
 appsync = ["graphql-core"]
 awslambda = ["docker (>=3.0.0)"]
 batch = ["docker (>=3.0.0)"]
-cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)", "setuptools"]
+cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)", "setuptools"]
 cognitoidp = ["joserfc (>=0.9.0)"]
-dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.6)"]
-dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.6)"]
+dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.6.1)"]
+dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.6.1)"]
 events = ["jsonpath-ng"]
 glue = ["pyparsing (>=3.0.7)"]
-iotdata = ["jsondiff (>=1.1.2)"]
-proxy = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)", "setuptools"]
+proxy = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "graphql-core", "joserfc (>=0.9.0)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)", "setuptools"]
 quicksight = ["jsonschema"]
-resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)"]
-s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.5.6)"]
-s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.5.6)"]
-server = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)", "setuptools"]
+resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)"]
+s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.6.1)"]
+s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.6.1)"]
+server = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "joserfc (>=0.9.0)", "jsonpath-ng", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)", "setuptools"]
 ssm = ["PyYAML (>=5.1)"]
 stepfunctions = ["antlr4-python3-runtime", "jsonpath-ng"]
 xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"]
@@ -3068,13 +3049,13 @@ files = [
 
 [[package]]
 name = "py-partiql-parser"
-version = "0.5.6"
+version = "0.6.1"
 description = "Pure Python PartiQL Parser"
 optional = false
 python-versions = "*"
 files = [
-    {file = "py_partiql_parser-0.5.6-py2.py3-none-any.whl", hash = "sha256:622d7b0444becd08c1f4e9e73b31690f4b1c309ab6e5ed45bf607fe71319309f"},
-    {file = "py_partiql_parser-0.5.6.tar.gz", hash = "sha256:6339f6bf85573a35686529fc3f491302e71dd091711dfe8df3be89a93767f97b"},
+    {file = "py_partiql_parser-0.6.1-py2.py3-none-any.whl", hash = "sha256:ff6a48067bff23c37e9044021bf1d949c83e195490c17e020715e927fe5b2456"},
+    {file = "py_partiql_parser-0.6.1.tar.gz", hash = "sha256:8583ff2a0e15560ef3bc3df109a7714d17f87d81d33e8c38b7fed4e58a63215d"},
 ]
 
 [package.extras]

From 5da1f4d6b66cdc689e561d6291abbb757ffa561a Mon Sep 17 00:00:00 2001
From: smaheshwar-pltr <maheshwarsreesh@gmail.com>
Date: Thu, 2 Jan 2025 15:06:20 -0500
Subject: [PATCH 07/32] URL-encode partition field names in file locations
 (#1457)

* URL-encode partition field names in file locations

* Separate into variable

* Add test

* Revert to main

* Failing test

* Disable justication from test

* Use `quote_plus` instead of `quote` to match Java behaviour

* Temporarily update test to pass

* Uncomment test

* Add unit test

* Fix typo in comment

* Add `make_name_compatible` suggestion so test passes

* Fix typo in schema field name

---------

Co-authored-by: Sreesh Maheshwar <smaheshwar@palantir.com>
---
 pyiceberg/partitioning.py                  |  8 ++-
 tests/integration/test_partitioning_key.py | 67 +++++++++++++++++++++-
 tests/table/test_partitioning.py           | 24 +++++++-
 3 files changed, 92 insertions(+), 7 deletions(-)

diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py
index 5f9178ebf9..c9b6316f59 100644
--- a/pyiceberg/partitioning.py
+++ b/pyiceberg/partitioning.py
@@ -30,7 +30,7 @@
     Tuple,
     TypeVar,
 )
-from urllib.parse import quote
+from urllib.parse import quote_plus
 
 from pydantic import (
     BeforeValidator,
@@ -234,9 +234,11 @@ def partition_to_path(self, data: Record, schema: Schema) -> str:
             partition_field = self.fields[pos]
             value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=data[pos])
 
-            value_str = quote(value_str, safe="")
+            value_str = quote_plus(value_str, safe="")
             value_strs.append(value_str)
-            field_strs.append(partition_field.name)
+
+            field_str = quote_plus(partition_field.name, safe="")
+            field_strs.append(field_str)
 
         path = "/".join([field_str + "=" + value_str for field_str, value_str in zip(field_strs, value_strs)])
         return path
diff --git a/tests/integration/test_partitioning_key.py b/tests/integration/test_partitioning_key.py
index 29f664909c..1ac808c7d0 100644
--- a/tests/integration/test_partitioning_key.py
+++ b/tests/integration/test_partitioning_key.py
@@ -18,7 +18,7 @@
 import uuid
 from datetime import date, datetime, timedelta, timezone
 from decimal import Decimal
-from typing import Any, List
+from typing import Any, Callable, List, Optional
 
 import pytest
 from pyspark.sql import SparkSession
@@ -70,6 +70,7 @@
     NestedField(field_id=12, name="fixed_field", field_type=FixedType(16), required=False),
     NestedField(field_id=13, name="decimal_field", field_type=DecimalType(5, 2), required=False),
     NestedField(field_id=14, name="uuid_field", field_type=UUIDType(), required=False),
+    NestedField(field_id=15, name="special#string+field", field_type=StringType(), required=False),
 )
 
 
@@ -77,7 +78,7 @@
 
 
 @pytest.mark.parametrize(
-    "partition_fields, partition_values, expected_partition_record, expected_hive_partition_path_slice, spark_create_table_sql_for_justification, spark_data_insert_sql_for_justification",
+    "partition_fields, partition_values, expected_partition_record, expected_hive_partition_path_slice, spark_create_table_sql_for_justification, spark_data_insert_sql_for_justification, make_compatible_name",
     [
         # # Identity Transform
         (
@@ -98,6 +99,7 @@
             VALUES
             (false, 'Boolean field set to false');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="string_field")],
@@ -117,6 +119,7 @@
             VALUES
             ('sample_string', 'Another string value')
             """,
+            None,
         ),
         (
             [PartitionField(source_id=4, field_id=1001, transform=IdentityTransform(), name="int_field")],
@@ -136,6 +139,7 @@
             VALUES
             (42, 'Associated string value for int 42')
             """,
+            None,
         ),
         (
             [PartitionField(source_id=5, field_id=1001, transform=IdentityTransform(), name="long_field")],
@@ -155,6 +159,7 @@
             VALUES
             (1234567890123456789, 'Associated string value for long 1234567890123456789')
             """,
+            None,
         ),
         (
             [PartitionField(source_id=6, field_id=1001, transform=IdentityTransform(), name="float_field")],
@@ -178,6 +183,7 @@
             # VALUES
             # (3.14, 'Associated string value for float 3.14')
             # """
+            None,
         ),
         (
             [PartitionField(source_id=7, field_id=1001, transform=IdentityTransform(), name="double_field")],
@@ -201,6 +207,7 @@
             # VALUES
             # (6.282, 'Associated string value for double 6.282')
             # """
+            None,
         ),
         (
             [PartitionField(source_id=8, field_id=1001, transform=IdentityTransform(), name="timestamp_field")],
@@ -220,6 +227,7 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999' AS TIMESTAMP_NTZ), 'Associated string value for timestamp 2023-01-01T12:00:00')
             """,
+            None,
         ),
         (
             [PartitionField(source_id=8, field_id=1001, transform=IdentityTransform(), name="timestamp_field")],
@@ -239,6 +247,7 @@
             VALUES
             (CAST('2023-01-01 12:00:01' AS TIMESTAMP_NTZ), 'Associated string value for timestamp 2023-01-01T12:00:00')
             """,
+            None,
         ),
         (
             [PartitionField(source_id=8, field_id=1001, transform=IdentityTransform(), name="timestamp_field")],
@@ -263,6 +272,7 @@
             # VALUES
             # (CAST('2023-01-01 12:00:00' AS TIMESTAMP_NTZ), 'Associated string value for timestamp 2023-01-01T12:00:00')
             # """
+            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=IdentityTransform(), name="timestamptz_field")],
@@ -287,6 +297,7 @@
             # VALUES
             # (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Associated string value for timestamp 2023-01-01 12:00:01.000999+03:00')
             # """
+            None,
         ),
         (
             [PartitionField(source_id=10, field_id=1001, transform=IdentityTransform(), name="date_field")],
@@ -306,6 +317,7 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Associated string value for date 2023-01-01')
             """,
+            None,
         ),
         (
             [PartitionField(source_id=14, field_id=1001, transform=IdentityTransform(), name="uuid_field")],
@@ -325,6 +337,7 @@
             VALUES
             ('f47ac10b-58cc-4372-a567-0e02b2c3d479', 'Associated string value for UUID f47ac10b-58cc-4372-a567-0e02b2c3d479')
             """,
+            None,
         ),
         (
             [PartitionField(source_id=11, field_id=1001, transform=IdentityTransform(), name="binary_field")],
@@ -344,6 +357,7 @@
             VALUES
             (CAST('example' AS BINARY), 'Associated string value for binary `example`')
             """,
+            None,
         ),
         (
             [PartitionField(source_id=13, field_id=1001, transform=IdentityTransform(), name="decimal_field")],
@@ -363,6 +377,7 @@
             VALUES
             (123.45, 'Associated string value for decimal 123.45')
             """,
+            None,
         ),
         # # Year Month Day Hour Transform
         # Month Transform
@@ -384,6 +399,7 @@
             VALUES
             (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP_NTZ), 'Event at 2023-01-01 11:55:59.999999');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=MonthTransform(), name="timestamptz_field_month")],
@@ -403,6 +419,7 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=10, field_id=1001, transform=MonthTransform(), name="date_field_month")],
@@ -422,6 +439,7 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01');
             """,
+            None,
         ),
         # Year Transform
         (
@@ -442,6 +460,7 @@
             VALUES
             (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), 'Event at 2023-01-01 11:55:59.999999');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=YearTransform(), name="timestamptz_field_year")],
@@ -461,6 +480,7 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=10, field_id=1001, transform=YearTransform(), name="date_field_year")],
@@ -480,6 +500,7 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01');
             """,
+            None,
         ),
         # # Day Transform
         (
@@ -500,6 +521,7 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=DayTransform(), name="timestamptz_field_day")],
@@ -519,6 +541,7 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=10, field_id=1001, transform=DayTransform(), name="date_field_day")],
@@ -538,6 +561,7 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01');
             """,
+            None,
         ),
         # Hour Transform
         (
@@ -558,6 +582,7 @@
                 VALUES
                 (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), 'Event within the 11th hour of 2023-01-01');
                 """,
+            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=HourTransform(), name="timestamptz_field_hour")],
@@ -577,6 +602,7 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00');
             """,
+            None,
         ),
         # Truncate Transform
         (
@@ -597,6 +623,7 @@
                 VALUES
                 (12345, 'Sample data for int');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=5, field_id=1001, transform=TruncateTransform(2), name="bigint_field_trunc")],
@@ -616,6 +643,7 @@
             VALUES
             (4294967297, 'Sample data for long');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=2, field_id=1001, transform=TruncateTransform(3), name="string_field_trunc")],
@@ -635,6 +663,7 @@
             VALUES
             ('abcdefg', 'Another sample for string');
             """,
+            None,
         ),
         (
             [PartitionField(source_id=13, field_id=1001, transform=TruncateTransform(width=5), name="decimal_field_trunc")],
@@ -654,6 +683,7 @@
             VALUES
             (678.90, 'Associated string value for decimal 678.90')
             """,
+            None,
         ),
         (
             [PartitionField(source_id=11, field_id=1001, transform=TruncateTransform(10), name="binary_field_trunc")],
@@ -673,6 +703,7 @@
                 VALUES
                 (binary('HELLOICEBERG'), 'Sample data for binary');
             """,
+            None,
         ),
         # Bucket Transform
         (
@@ -693,6 +724,7 @@
             VALUES
             (10, 'Integer with value 10');
             """,
+            None,
         ),
         # Test multiple field combinations could generate the Partition record and hive partition path correctly
         (
@@ -721,6 +753,27 @@
             VALUES
             (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), CAST('2023-01-01' AS DATE), 'some data');
             """,
+            None,
+        ),
+        # Test that special characters are URL-encoded
+        (
+            [PartitionField(source_id=15, field_id=1001, transform=IdentityTransform(), name="special#string+field")],
+            ["special string"],
+            Record(**{"special#string+field": "special string"}),  # type: ignore
+            "special%23string%2Bfield=special+string",
+            f"""CREATE TABLE {identifier} (
+                `special#string+field` string
+            )
+            USING iceberg
+            PARTITIONED BY (
+                identity(`special#string+field`)
+            )
+            """,
+            f"""INSERT INTO {identifier}
+            VALUES
+            ('special string')
+            """,
+            lambda name: name.replace("#", "_x23").replace("+", "_x2B"),
         ),
     ],
 )
@@ -734,6 +787,7 @@ def test_partition_key(
     expected_hive_partition_path_slice: str,
     spark_create_table_sql_for_justification: str,
     spark_data_insert_sql_for_justification: str,
+    make_compatible_name: Optional[Callable[[str], str]],
 ) -> None:
     partition_field_values = [PartitionFieldValue(field, value) for field, value in zip(partition_fields, partition_values)]
     spec = PartitionSpec(*partition_fields)
@@ -768,5 +822,12 @@ def test_partition_key(
         spark_path_for_justification = (
             snapshot.manifests(iceberg_table.io)[0].fetch_manifest_entry(iceberg_table.io)[0].data_file.file_path
         )
-        assert spark_partition_for_justification == expected_partition_record
+        # Special characters in partition value are sanitized when written to the data file's partition field
+        # Use `make_compatible_name` to match the sanitize behavior
+        sanitized_record = (
+            Record(**{make_compatible_name(k): v for k, v in vars(expected_partition_record).items()})
+            if make_compatible_name
+            else expected_partition_record
+        )
+        assert spark_partition_for_justification == sanitized_record
         assert expected_hive_partition_path_slice in spark_path_for_justification
diff --git a/tests/table/test_partitioning.py b/tests/table/test_partitioning.py
index d7425bc351..127d57a798 100644
--- a/tests/table/test_partitioning.py
+++ b/tests/table/test_partitioning.py
@@ -16,7 +16,8 @@
 # under the License.
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec
 from pyiceberg.schema import Schema
-from pyiceberg.transforms import BucketTransform, TruncateTransform
+from pyiceberg.transforms import BucketTransform, IdentityTransform, TruncateTransform
+from pyiceberg.typedef import Record
 from pyiceberg.types import (
     IntegerType,
     NestedField,
@@ -118,6 +119,27 @@ def test_deserialize_partition_spec() -> None:
     )
 
 
+def test_partition_spec_to_path() -> None:
+    schema = Schema(
+        NestedField(field_id=1, name="str", field_type=StringType(), required=False),
+        NestedField(field_id=2, name="other_str", field_type=StringType(), required=False),
+        NestedField(field_id=3, name="int", field_type=IntegerType(), required=True),
+    )
+
+    spec = PartitionSpec(
+        PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=19), name="my#str%bucket"),
+        PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="other str+bucket"),
+        PartitionField(source_id=3, field_id=1002, transform=BucketTransform(num_buckets=25), name="my!int:bucket"),
+        spec_id=3,
+    )
+
+    record = Record(**{"my#str%bucket": "my+str", "other str+bucket": "( )", "my!int:bucket": 10})  # type: ignore
+
+    # Both partition field names and values should be URL encoded, with spaces mapping to plus signs, to match the Java
+    # behaviour: https://github.com/apache/iceberg/blob/ca3db931b0f024f0412084751ac85dd4ef2da7e7/api/src/main/java/org/apache/iceberg/PartitionSpec.java#L198-L204
+    assert spec.partition_to_path(record, schema) == "my%23str%25bucket=my%2Bstr/other+str%2Bbucket=%28+%29/my%21int%3Abucket=10"
+
+
 def test_partition_type(table_schema_simple: Schema) -> None:
     spec = PartitionSpec(
         PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=19), name="str_truncate"),

From f7d8a2f50a7e8caafd04ff8ec3c08e113a5b71b3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 3 Jan 2025 08:39:12 +0100
Subject: [PATCH 08/32] Bump pyparsing from 3.2.0 to 3.2.1 (#1481)

Bumps [pyparsing](https://github.com/pyparsing/pyparsing) from 3.2.0 to 3.2.1.
- [Release notes](https://github.com/pyparsing/pyparsing/releases)
- [Changelog](https://github.com/pyparsing/pyparsing/blob/master/CHANGES)
- [Commits](https://github.com/pyparsing/pyparsing/compare/3.2.0...3.2.1)

---
updated-dependencies:
- dependency-name: pyparsing
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 640cab2733..4fd524bb3f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3330,13 +3330,13 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 
 [[package]]
 name = "pyparsing"
-version = "3.2.0"
+version = "3.2.1"
 description = "pyparsing module - Classes and methods to define and execute parsing grammars"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"},
-    {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"},
+    {file = "pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1"},
+    {file = "pyparsing-3.2.1.tar.gz", hash = "sha256:61980854fd66de3a90028d679a954d5f2623e83144b5afe5ee86f43d762e5f0a"},
 ]
 
 [package.extras]

From f863c4e7cde850ec23111d45105351b314716e3a Mon Sep 17 00:00:00 2001
From: Tyler White <50381805+IndexSeek@users.noreply.github.com>
Date: Fri, 3 Jan 2025 14:43:07 -0500
Subject: [PATCH 09/32] Configure `codespell` in `pre-commit` (#1478)

* feat: configure codespell in pre-commit

* add apache license header

* style: resolve pre-commit violations
---
 .codespellrc                 | 18 ++++++++++++++++++
 .pre-commit-config.yaml      |  4 ++++
 pyiceberg/avro/reader.py     |  2 +-
 pyiceberg/io/pyarrow.py      |  2 +-
 pyiceberg/utils/singleton.py |  2 +-
 tests/test_transforms.py     |  2 +-
 6 files changed, 26 insertions(+), 4 deletions(-)
 create mode 100644 .codespellrc

diff --git a/.codespellrc b/.codespellrc
new file mode 100644
index 0000000000..a38787e126
--- /dev/null
+++ b/.codespellrc
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+[codespell]
+ignore-words-list = BoundIn,fo,MoR,NotIn,notIn,oT
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c0b9a31792..bdd1f362b5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -69,6 +69,10 @@ repos:
           # --line-length is set to a high value to deal with very long lines
           - --line-length
           - '99999'
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.3.0
+    hooks:
+      - id: codespell
 ci:
     autofix_commit_msg: |
         [pre-commit.ci] auto fixes from pre-commit.com hooks
diff --git a/pyiceberg/avro/reader.py b/pyiceberg/avro/reader.py
index 988bd42ba4..a5578680d6 100644
--- a/pyiceberg/avro/reader.py
+++ b/pyiceberg/avro/reader.py
@@ -51,7 +51,7 @@
 def _skip_map_array(decoder: BinaryDecoder, skip_entry: Callable[[], None]) -> None:
     """Skips over an array or map.
 
-    Both the array and map are encoded similar, and we can re-use
+    Both the array and map are encoded similar, and we can reuse
     the logic of skipping in an efficient way.
 
     From the Avro spec:
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index ef6937f1bb..e8c9f64d63 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1536,7 +1536,7 @@ def _to_requested_schema(
     include_field_ids: bool = False,
     use_large_types: bool = True,
 ) -> pa.RecordBatch:
-    # We could re-use some of these visitors
+    # We could reuse some of these visitors
     struct_array = visit_with_partner(
         requested_schema,
         batch,
diff --git a/pyiceberg/utils/singleton.py b/pyiceberg/utils/singleton.py
index 8a4bbf91ce..06ee62febe 100644
--- a/pyiceberg/utils/singleton.py
+++ b/pyiceberg/utils/singleton.py
@@ -15,7 +15,7 @@
 #  specific language governing permissions and limitations
 #  under the License.
 """
-This is a singleton metaclass that can be used to cache and re-use existing objects.
+This is a singleton metaclass that can be used to cache and reuse existing objects.
 
 In the Iceberg codebase we have a lot of objects that are stateless (for example Types such as StringType,
 BooleanType etc). FixedTypes have arguments (eg. Fixed[22]) that we also make part of the key when caching
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 7ebab87e3a..6d04a1e4ce 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -899,7 +899,7 @@ def test_projection_truncate_string_set_same_result(bound_reference_str: BoundRe
 def test_projection_truncate_string_set_in(bound_reference_str: BoundReference[str]) -> None:
     assert TruncateTransform(3).project(
         "name", BoundIn(term=bound_reference_str, literals={literal("hello"), literal("world")})
-    ) == In(term="name", literals={literal("hel"), literal("wor")})
+    ) == In(term="name", literals={literal("hel"), literal("wor")})  # codespell:ignore hel
 
 
 def test_projection_truncate_string_set_not_in(bound_reference_str: BoundReference[str]) -> None:

From acd6f5a8a19db709e835e2686b87d4db3dca254f Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevinjqliu@users.noreply.github.com>
Date: Fri, 3 Jan 2025 15:22:02 -0500
Subject: [PATCH 10/32] Remove deprecation warnings (#1416)

* tests/expressions/test_parser.py::test_is_null
Deprecated in 0.8.0, will be removed in 0.9.0. Parsing expressions with table name is deprecated. Only provide field names in the row_filter.

* tests/catalog/test_rest.py:
Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI configuration
---
 pyiceberg/utils/deprecated.py    |  1 -
 tests/catalog/test_rest.py       | 33 ++++++++++++++++++++++++++++++++
 tests/expressions/test_parser.py |  1 -
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/utils/deprecated.py b/pyiceberg/utils/deprecated.py
index da2cb3b500..b196f47ec6 100644
--- a/pyiceberg/utils/deprecated.py
+++ b/pyiceberg/utils/deprecated.py
@@ -56,7 +56,6 @@ def deprecation_message(deprecated_in: str, removed_in: str, help_message: Optio
 
 def _deprecation_warning(message: str) -> None:
     with warnings.catch_warnings():  # temporarily override warning handling
-        warnings.simplefilter("always", DeprecationWarning)  # turn off filter
         warnings.warn(
             message,
             category=DeprecationWarning,
diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py
index 091a67166b..2a4b3a7a1f 100644
--- a/tests/catalog/test_rest.py
+++ b/tests/catalog/test_rest.py
@@ -121,6 +121,9 @@ def test_no_uri_supplied() -> None:
         RestCatalog("production")
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_token_200(rest_mock: Mocker) -> None:
     rest_mock.post(
         f"{TEST_URI}v1/oauth/tokens",
@@ -141,6 +144,9 @@ def test_token_200(rest_mock: Mocker) -> None:
     )
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_token_200_without_optional_fields(rest_mock: Mocker) -> None:
     rest_mock.post(
         f"{TEST_URI}v1/oauth/tokens",
@@ -157,6 +163,9 @@ def test_token_200_without_optional_fields(rest_mock: Mocker) -> None:
     )
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_token_with_optional_oauth_params(rest_mock: Mocker) -> None:
     mock_request = rest_mock.post(
         f"{TEST_URI}v1/oauth/tokens",
@@ -179,6 +188,9 @@ def test_token_with_optional_oauth_params(rest_mock: Mocker) -> None:
     assert TEST_RESOURCE in mock_request.last_request.text
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_token_with_optional_oauth_params_as_empty(rest_mock: Mocker) -> None:
     mock_request = rest_mock.post(
         f"{TEST_URI}v1/oauth/tokens",
@@ -199,6 +211,9 @@ def test_token_with_optional_oauth_params_as_empty(rest_mock: Mocker) -> None:
     assert TEST_RESOURCE not in mock_request.last_request.text
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_token_with_default_scope(rest_mock: Mocker) -> None:
     mock_request = rest_mock.post(
         f"{TEST_URI}v1/oauth/tokens",
@@ -217,6 +232,9 @@ def test_token_with_default_scope(rest_mock: Mocker) -> None:
     assert "catalog" in mock_request.last_request.text
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_token_with_custom_scope(rest_mock: Mocker) -> None:
     mock_request = rest_mock.post(
         f"{TEST_URI}v1/oauth/tokens",
@@ -236,6 +254,9 @@ def test_token_with_custom_scope(rest_mock: Mocker) -> None:
     assert TEST_SCOPE in mock_request.last_request.text
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_token_200_w_auth_url(rest_mock: Mocker) -> None:
     rest_mock.post(
         TEST_AUTH_URL,
@@ -258,6 +279,9 @@ def test_token_200_w_auth_url(rest_mock: Mocker) -> None:
     # pylint: enable=W0212
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_config_200(requests_mock: Mocker) -> None:
     requests_mock.get(
         f"{TEST_URI}v1/config",
@@ -343,6 +367,9 @@ def test_config_sets_headers(requests_mock: Mocker) -> None:
     )
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_token_400(rest_mock: Mocker) -> None:
     rest_mock.post(
         f"{TEST_URI}v1/oauth/tokens",
@@ -356,6 +383,9 @@ def test_token_400(rest_mock: Mocker) -> None:
     assert str(e.value) == "invalid_client: Credentials for key invalid_key do not match"
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_token_401(rest_mock: Mocker) -> None:
     message = "invalid_client"
     rest_mock.post(
@@ -489,6 +519,9 @@ def test_list_namespace_with_parent_200(rest_mock: Mocker) -> None:
     ]
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
 def test_list_namespaces_token_expired(rest_mock: Mocker) -> None:
     new_token = "new_jwt_token"
     new_header = dict(TEST_HEADERS)
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index 085150edec..9d7a3ac094 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -70,7 +70,6 @@ def test_equals_false() -> None:
 def test_is_null() -> None:
     assert IsNull("foo") == parser.parse("foo is null")
     assert IsNull("foo") == parser.parse("foo IS NULL")
-    assert IsNull("foo") == parser.parse("table.foo IS NULL")
 
 
 def test_not_null() -> None:

From 59fffe30204185f8f3981f2dd51047f540eaa6ef Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevinjqliu@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:32:23 -0500
Subject: [PATCH 11/32] [infra] replace `pycln` with `ruff`  (#1485)

* pre-commit autoupdate

* run ruff linter and formatter

* remove pycln

* ignore some rules

* make lint

* poetry add ruff --dev

* remove ruff from dev dep

* git checkout apache/main poetry.lock

* add back --exit-non-zero-on-fix
---
 .pre-commit-config.yaml                      |  15 +-
 pyiceberg/cli/output.py                      |  12 +-
 pyiceberg/expressions/visitors.py            |  16 +-
 pyiceberg/io/pyarrow.py                      |  44 +-
 pyiceberg/manifest.py                        |  56 ++-
 pyiceberg/schema.py                          |  14 +-
 pyiceberg/table/__init__.py                  |  32 +-
 pyiceberg/table/inspect.py                   | 480 ++++++++++---------
 ruff.toml                                    |   2 +-
 tests/avro/test_resolver.py                  |  50 +-
 tests/avro/test_writer.py                    |  40 +-
 tests/catalog/test_rest.py                   |  48 +-
 tests/catalog/test_sql.py                    |  34 +-
 tests/conftest.py                            | 296 ++++++------
 tests/expressions/test_evaluator.py          |  30 +-
 tests/expressions/test_visitors.py           | 480 +++++++++----------
 tests/integration/test_add_files.py          | 104 ++--
 tests/integration/test_deletes.py            |  16 +-
 tests/integration/test_reads.py              |  28 +-
 tests/integration/test_rest_schema.py        |  20 +-
 tests/integration/test_writes/test_writes.py | 180 ++++---
 tests/io/test_pyarrow.py                     | 122 +++--
 tests/io/test_pyarrow_visitor.py             | 352 +++++++-------
 tests/table/test_init.py                     | 114 ++---
 tests/table/test_name_mapping.py             | 244 +++++-----
 tests/test_schema.py                         |  24 +-
 tests/utils/test_manifest.py                 |   6 +-
 27 files changed, 1535 insertions(+), 1324 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bdd1f362b5..e3dc04bde3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -28,26 +28,19 @@ repos:
       - id: check-yaml
       - id: check-ast
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    # Ruff version (Used for linting)
-    rev: v0.7.4
+    rev: v0.8.6
     hooks:
       - id: ruff
-        args: [ --fix, --exit-non-zero-on-fix, --preview ]
+        args: [ --fix, --exit-non-zero-on-fix ]
       - id: ruff-format
-        args: [ --preview ]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.8.0
+    rev: v1.14.1
     hooks:
       - id: mypy
         args:
           [--install-types, --non-interactive, --config=pyproject.toml]
-  - repo: https://github.com/hadialqattan/pycln
-    rev: v2.4.0
-    hooks:
-      - id: pycln
-        args: [--config=pyproject.toml]
   - repo: https://github.com/igorshubovych/markdownlint-cli
-    rev: v0.42.0
+    rev: v0.43.0
     hooks:
       - id: markdownlint
         args: ["--fix"]
diff --git a/pyiceberg/cli/output.py b/pyiceberg/cli/output.py
index a4183c32bd..0eb85841bf 100644
--- a/pyiceberg/cli/output.py
+++ b/pyiceberg/cli/output.py
@@ -242,8 +242,10 @@ def version(self, version: str) -> None:
         self._out({"version": version})
 
     def describe_refs(self, refs: List[Tuple[str, SnapshotRefType, Dict[str, str]]]) -> None:
-        self._out([
-            {"name": name, "type": type, detail_key: detail_val}
-            for name, type, detail in refs
-            for detail_key, detail_val in detail.items()
-        ])
+        self._out(
+            [
+                {"name": name, "type": type, detail_key: detail_val}
+                for name, type, detail in refs
+                for detail_key, detail_val in detail.items()
+            ]
+        )
diff --git a/pyiceberg/expressions/visitors.py b/pyiceberg/expressions/visitors.py
index 26698921b5..768878b068 100644
--- a/pyiceberg/expressions/visitors.py
+++ b/pyiceberg/expressions/visitors.py
@@ -1228,7 +1228,7 @@ def visit_less_than(self, term: BoundTerm[L], literal: Literal[L]) -> bool:
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
 
-            if lower_bound >= literal.value:
+            if lower_bound >= literal.value:  # type: ignore[operator]
                 return ROWS_CANNOT_MATCH
 
         return ROWS_MIGHT_MATCH
@@ -1249,7 +1249,7 @@ def visit_less_than_or_equal(self, term: BoundTerm[L], literal: Literal[L]) -> b
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
 
-            if lower_bound > literal.value:
+            if lower_bound > literal.value:  # type: ignore[operator]
                 return ROWS_CANNOT_MATCH
 
         return ROWS_MIGHT_MATCH
@@ -1266,7 +1266,7 @@ def visit_greater_than(self, term: BoundTerm[L], literal: Literal[L]) -> bool:
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
             upper_bound = from_bytes(field.field_type, upper_bound_bytes)
-            if upper_bound <= literal.value:
+            if upper_bound <= literal.value:  # type: ignore[operator]
                 if self._is_nan(upper_bound):
                     # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                     return ROWS_MIGHT_MATCH
@@ -1287,7 +1287,7 @@ def visit_greater_than_or_equal(self, term: BoundTerm[L], literal: Literal[L]) -
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
             upper_bound = from_bytes(field.field_type, upper_bound_bytes)
-            if upper_bound < literal.value:
+            if upper_bound < literal.value:  # type: ignore[operator]
                 if self._is_nan(upper_bound):
                     # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                     return ROWS_MIGHT_MATCH
@@ -1312,7 +1312,7 @@ def visit_equal(self, term: BoundTerm[L], literal: Literal[L]) -> bool:
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
 
-            if lower_bound > literal.value:
+            if lower_bound > literal.value:  # type: ignore[operator]
                 return ROWS_CANNOT_MATCH
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
@@ -1321,7 +1321,7 @@ def visit_equal(self, term: BoundTerm[L], literal: Literal[L]) -> bool:
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
 
-            if upper_bound < literal.value:
+            if upper_bound < literal.value:  # type: ignore[operator]
                 return ROWS_CANNOT_MATCH
 
         return ROWS_MIGHT_MATCH
@@ -1349,7 +1349,7 @@ def visit_in(self, term: BoundTerm[L], literals: Set[L]) -> bool:
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
 
-            literals = {lit for lit in literals if lower_bound <= lit}
+            literals = {lit for lit in literals if lower_bound <= lit}  # type: ignore[operator]
             if len(literals) == 0:
                 return ROWS_CANNOT_MATCH
 
@@ -1359,7 +1359,7 @@ def visit_in(self, term: BoundTerm[L], literals: Set[L]) -> bool:
             if self._is_nan(upper_bound):
                 return ROWS_MIGHT_MATCH
 
-            literals = {lit for lit in literals if upper_bound >= lit}
+            literals = {lit for lit in literals if upper_bound >= lit}  # type: ignore[operator]
             if len(literals) == 0:
                 return ROWS_CANNOT_MATCH
 
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index e8c9f64d63..dc41a7d6a1 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -2449,27 +2449,31 @@ def _dataframe_to_data_files(
         yield from write_file(
             io=io,
             table_metadata=table_metadata,
-            tasks=iter([
-                WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=task_schema)
-                for batches in bin_pack_arrow_table(df, target_file_size)
-            ]),
+            tasks=iter(
+                [
+                    WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=task_schema)
+                    for batches in bin_pack_arrow_table(df, target_file_size)
+                ]
+            ),
         )
     else:
         partitions = _determine_partitions(spec=table_metadata.spec(), schema=table_metadata.schema(), arrow_table=df)
         yield from write_file(
             io=io,
             table_metadata=table_metadata,
-            tasks=iter([
-                WriteTask(
-                    write_uuid=write_uuid,
-                    task_id=next(counter),
-                    record_batches=batches,
-                    partition_key=partition.partition_key,
-                    schema=task_schema,
-                )
-                for partition in partitions
-                for batches in bin_pack_arrow_table(partition.arrow_table_partition, target_file_size)
-            ]),
+            tasks=iter(
+                [
+                    WriteTask(
+                        write_uuid=write_uuid,
+                        task_id=next(counter),
+                        record_batches=batches,
+                        partition_key=partition.partition_key,
+                        schema=task_schema,
+                    )
+                    for partition in partitions
+                    for batches in bin_pack_arrow_table(partition.arrow_table_partition, target_file_size)
+                ]
+            ),
         )
 
 
@@ -2534,10 +2538,12 @@ def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.T
     partition_columns: List[Tuple[PartitionField, NestedField]] = [
         (partition_field, schema.find_field(partition_field.source_id)) for partition_field in spec.fields
     ]
-    partition_values_table = pa.table({
-        str(partition.field_id): partition.transform.pyarrow_transform(field.field_type)(arrow_table[field.name])
-        for partition, field in partition_columns
-    })
+    partition_values_table = pa.table(
+        {
+            str(partition.field_id): partition.transform.pyarrow_transform(field.field_type)(arrow_table[field.name])
+            for partition, field in partition_columns
+        }
+    )
 
     # Sort by partitions
     sort_indices = pa.compute.sort_indices(
diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index a56da5fc05..5a32a6330c 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -292,28 +292,32 @@ def __repr__(self) -> str:
 
 
 def data_file_with_partition(partition_type: StructType, format_version: TableVersion) -> StructType:
-    data_file_partition_type = StructType(*[
-        NestedField(
-            field_id=field.field_id,
-            name=field.name,
-            field_type=field.field_type,
-            required=field.required,
-        )
-        for field in partition_type.fields
-    ])
+    data_file_partition_type = StructType(
+        *[
+            NestedField(
+                field_id=field.field_id,
+                name=field.name,
+                field_type=field.field_type,
+                required=field.required,
+            )
+            for field in partition_type.fields
+        ]
+    )
 
-    return StructType(*[
-        NestedField(
-            field_id=102,
-            name="partition",
-            field_type=data_file_partition_type,
-            required=True,
-            doc="Partition data tuple, schema based on the partition spec",
-        )
-        if field.field_id == 102
-        else field
-        for field in DATA_FILE_TYPE[format_version].fields
-    ])
+    return StructType(
+        *[
+            NestedField(
+                field_id=102,
+                name="partition",
+                field_type=data_file_partition_type,
+                required=True,
+                doc="Partition data tuple, schema based on the partition spec",
+            )
+            if field.field_id == 102
+            else field
+            for field in DATA_FILE_TYPE[format_version].fields
+        ]
+    )
 
 
 class DataFile(Record):
@@ -398,10 +402,12 @@ def __eq__(self, other: Any) -> bool:
 
 
 def manifest_entry_schema_with_data_file(format_version: TableVersion, data_file: StructType) -> Schema:
-    return Schema(*[
-        NestedField(2, "data_file", data_file, required=True) if field.field_id == 2 else field
-        for field in MANIFEST_ENTRY_SCHEMAS[format_version].fields
-    ])
+    return Schema(
+        *[
+            NestedField(2, "data_file", data_file, required=True) if field.field_id == 2 else field
+            for field in MANIFEST_ENTRY_SCHEMAS[format_version].fields
+        ]
+    )
 
 
 class ManifestEntry(Record):
diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py
index cfe3fe3a7b..5a373cb15f 100644
--- a/pyiceberg/schema.py
+++ b/pyiceberg/schema.py
@@ -1707,12 +1707,14 @@ def list(self, list_type: ListType, element_result: Callable[[], bool]) -> bool:
         return self._is_field_compatible(list_type.element_field) and element_result()
 
     def map(self, map_type: MapType, key_result: Callable[[], bool], value_result: Callable[[], bool]) -> bool:
-        return all([
-            self._is_field_compatible(map_type.key_field),
-            self._is_field_compatible(map_type.value_field),
-            key_result(),
-            value_result(),
-        ])
+        return all(
+            [
+                self._is_field_compatible(map_type.key_field),
+                self._is_field_compatible(map_type.value_field),
+                key_result(),
+                value_result(),
+            ]
+        )
 
     def primitive(self, primitive: PrimitiveType) -> bool:
         return True
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 2469a9ed7b..7bc3fe838b 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -629,18 +629,20 @@ def delete(
                 if len(filtered_df) == 0:
                     replaced_files.append((original_file.file, []))
                 elif len(df) != len(filtered_df):
-                    replaced_files.append((
-                        original_file.file,
-                        list(
-                            _dataframe_to_data_files(
-                                io=self._table.io,
-                                df=filtered_df,
-                                table_metadata=self.table_metadata,
-                                write_uuid=commit_uuid,
-                                counter=counter,
-                            )
-                        ),
-                    ))
+                    replaced_files.append(
+                        (
+                            original_file.file,
+                            list(
+                                _dataframe_to_data_files(
+                                    io=self._table.io,
+                                    df=filtered_df,
+                                    table_metadata=self.table_metadata,
+                                    write_uuid=commit_uuid,
+                                    counter=counter,
+                                )
+                            ),
+                        )
+                    )
 
             if len(replaced_files) > 0:
                 with self.update_snapshot(snapshot_properties=snapshot_properties).overwrite() as overwrite_snapshot:
@@ -680,9 +682,9 @@ def add_files(
                 raise ValueError(f"Cannot add files that are already referenced by table, files: {', '.join(referenced_files)}")
 
         if self.table_metadata.name_mapping() is None:
-            self.set_properties(**{
-                TableProperties.DEFAULT_NAME_MAPPING: self.table_metadata.schema().name_mapping.model_dump_json()
-            })
+            self.set_properties(
+                **{TableProperties.DEFAULT_NAME_MAPPING: self.table_metadata.schema().name_mapping.model_dump_json()}
+            )
         with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
             data_files = _parquet_files_to_data_files(
                 table_metadata=self.table_metadata, file_paths=file_paths, io=self._table.io
diff --git a/pyiceberg/table/inspect.py b/pyiceberg/table/inspect.py
index beee426533..71d38a2279 100644
--- a/pyiceberg/table/inspect.py
+++ b/pyiceberg/table/inspect.py
@@ -58,14 +58,16 @@ def _get_snapshot(self, snapshot_id: Optional[int] = None) -> Snapshot:
     def snapshots(self) -> "pa.Table":
         import pyarrow as pa
 
-        snapshots_schema = pa.schema([
-            pa.field("committed_at", pa.timestamp(unit="ms"), nullable=False),
-            pa.field("snapshot_id", pa.int64(), nullable=False),
-            pa.field("parent_id", pa.int64(), nullable=True),
-            pa.field("operation", pa.string(), nullable=True),
-            pa.field("manifest_list", pa.string(), nullable=False),
-            pa.field("summary", pa.map_(pa.string(), pa.string()), nullable=True),
-        ])
+        snapshots_schema = pa.schema(
+            [
+                pa.field("committed_at", pa.timestamp(unit="ms"), nullable=False),
+                pa.field("snapshot_id", pa.int64(), nullable=False),
+                pa.field("parent_id", pa.int64(), nullable=True),
+                pa.field("operation", pa.string(), nullable=True),
+                pa.field("manifest_list", pa.string(), nullable=False),
+                pa.field("summary", pa.map_(pa.string(), pa.string()), nullable=True),
+            ]
+        )
         snapshots = []
         for snapshot in self.tbl.metadata.snapshots:
             if summary := snapshot.summary:
@@ -75,14 +77,16 @@ def snapshots(self) -> "pa.Table":
                 operation = None
                 additional_properties = None
 
-            snapshots.append({
-                "committed_at": datetime.fromtimestamp(snapshot.timestamp_ms / 1000.0, tz=timezone.utc),
-                "snapshot_id": snapshot.snapshot_id,
-                "parent_id": snapshot.parent_snapshot_id,
-                "operation": str(operation),
-                "manifest_list": snapshot.manifest_list,
-                "summary": additional_properties,
-            })
+            snapshots.append(
+                {
+                    "committed_at": datetime.fromtimestamp(snapshot.timestamp_ms / 1000.0, tz=timezone.utc),
+                    "snapshot_id": snapshot.snapshot_id,
+                    "parent_id": snapshot.parent_snapshot_id,
+                    "operation": str(operation),
+                    "manifest_list": snapshot.manifest_list,
+                    "summary": additional_properties,
+                }
+            )
 
         return pa.Table.from_pylist(
             snapshots,
@@ -100,14 +104,16 @@ def entries(self, snapshot_id: Optional[int] = None) -> "pa.Table":
 
         def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
             pa_bound_type = schema_to_pyarrow(bound_type)
-            return pa.struct([
-                pa.field("column_size", pa.int64(), nullable=True),
-                pa.field("value_count", pa.int64(), nullable=True),
-                pa.field("null_value_count", pa.int64(), nullable=True),
-                pa.field("nan_value_count", pa.int64(), nullable=True),
-                pa.field("lower_bound", pa_bound_type, nullable=True),
-                pa.field("upper_bound", pa_bound_type, nullable=True),
-            ])
+            return pa.struct(
+                [
+                    pa.field("column_size", pa.int64(), nullable=True),
+                    pa.field("value_count", pa.int64(), nullable=True),
+                    pa.field("null_value_count", pa.int64(), nullable=True),
+                    pa.field("nan_value_count", pa.int64(), nullable=True),
+                    pa.field("lower_bound", pa_bound_type, nullable=True),
+                    pa.field("upper_bound", pa_bound_type, nullable=True),
+                ]
+            )
 
         for field in self.tbl.metadata.schema().fields:
             readable_metrics_struct.append(
@@ -117,35 +123,39 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
         partition_record = self.tbl.metadata.specs_struct()
         pa_record_struct = schema_to_pyarrow(partition_record)
 
-        entries_schema = pa.schema([
-            pa.field("status", pa.int8(), nullable=False),
-            pa.field("snapshot_id", pa.int64(), nullable=False),
-            pa.field("sequence_number", pa.int64(), nullable=False),
-            pa.field("file_sequence_number", pa.int64(), nullable=False),
-            pa.field(
-                "data_file",
-                pa.struct([
-                    pa.field("content", pa.int8(), nullable=False),
-                    pa.field("file_path", pa.string(), nullable=False),
-                    pa.field("file_format", pa.string(), nullable=False),
-                    pa.field("partition", pa_record_struct, nullable=False),
-                    pa.field("record_count", pa.int64(), nullable=False),
-                    pa.field("file_size_in_bytes", pa.int64(), nullable=False),
-                    pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True),
-                    pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
-                    pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
-                    pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
-                    pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
-                    pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
-                    pa.field("key_metadata", pa.binary(), nullable=True),
-                    pa.field("split_offsets", pa.list_(pa.int64()), nullable=True),
-                    pa.field("equality_ids", pa.list_(pa.int32()), nullable=True),
-                    pa.field("sort_order_id", pa.int32(), nullable=True),
-                ]),
-                nullable=False,
-            ),
-            pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True),
-        ])
+        entries_schema = pa.schema(
+            [
+                pa.field("status", pa.int8(), nullable=False),
+                pa.field("snapshot_id", pa.int64(), nullable=False),
+                pa.field("sequence_number", pa.int64(), nullable=False),
+                pa.field("file_sequence_number", pa.int64(), nullable=False),
+                pa.field(
+                    "data_file",
+                    pa.struct(
+                        [
+                            pa.field("content", pa.int8(), nullable=False),
+                            pa.field("file_path", pa.string(), nullable=False),
+                            pa.field("file_format", pa.string(), nullable=False),
+                            pa.field("partition", pa_record_struct, nullable=False),
+                            pa.field("record_count", pa.int64(), nullable=False),
+                            pa.field("file_size_in_bytes", pa.int64(), nullable=False),
+                            pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True),
+                            pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+                            pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+                            pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+                            pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
+                            pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
+                            pa.field("key_metadata", pa.binary(), nullable=True),
+                            pa.field("split_offsets", pa.list_(pa.int64()), nullable=True),
+                            pa.field("equality_ids", pa.list_(pa.int32()), nullable=True),
+                            pa.field("sort_order_id", pa.int32(), nullable=True),
+                        ]
+                    ),
+                    nullable=False,
+                ),
+                pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True),
+            ]
+        )
 
         entries = []
         snapshot = self._get_snapshot(snapshot_id)
@@ -180,32 +190,34 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
                     for pos, field in enumerate(self.tbl.metadata.specs()[manifest.partition_spec_id].fields)
                 }
 
-                entries.append({
-                    "status": entry.status.value,
-                    "snapshot_id": entry.snapshot_id,
-                    "sequence_number": entry.sequence_number,
-                    "file_sequence_number": entry.file_sequence_number,
-                    "data_file": {
-                        "content": entry.data_file.content,
-                        "file_path": entry.data_file.file_path,
-                        "file_format": entry.data_file.file_format,
-                        "partition": partition_record_dict,
-                        "record_count": entry.data_file.record_count,
-                        "file_size_in_bytes": entry.data_file.file_size_in_bytes,
-                        "column_sizes": dict(entry.data_file.column_sizes),
-                        "value_counts": dict(entry.data_file.value_counts),
-                        "null_value_counts": dict(entry.data_file.null_value_counts),
-                        "nan_value_counts": entry.data_file.nan_value_counts,
-                        "lower_bounds": entry.data_file.lower_bounds,
-                        "upper_bounds": entry.data_file.upper_bounds,
-                        "key_metadata": entry.data_file.key_metadata,
-                        "split_offsets": entry.data_file.split_offsets,
-                        "equality_ids": entry.data_file.equality_ids,
-                        "sort_order_id": entry.data_file.sort_order_id,
-                        "spec_id": entry.data_file.spec_id,
-                    },
-                    "readable_metrics": readable_metrics,
-                })
+                entries.append(
+                    {
+                        "status": entry.status.value,
+                        "snapshot_id": entry.snapshot_id,
+                        "sequence_number": entry.sequence_number,
+                        "file_sequence_number": entry.file_sequence_number,
+                        "data_file": {
+                            "content": entry.data_file.content,
+                            "file_path": entry.data_file.file_path,
+                            "file_format": entry.data_file.file_format,
+                            "partition": partition_record_dict,
+                            "record_count": entry.data_file.record_count,
+                            "file_size_in_bytes": entry.data_file.file_size_in_bytes,
+                            "column_sizes": dict(entry.data_file.column_sizes),
+                            "value_counts": dict(entry.data_file.value_counts),
+                            "null_value_counts": dict(entry.data_file.null_value_counts),
+                            "nan_value_counts": entry.data_file.nan_value_counts,
+                            "lower_bounds": entry.data_file.lower_bounds,
+                            "upper_bounds": entry.data_file.upper_bounds,
+                            "key_metadata": entry.data_file.key_metadata,
+                            "split_offsets": entry.data_file.split_offsets,
+                            "equality_ids": entry.data_file.equality_ids,
+                            "sort_order_id": entry.data_file.sort_order_id,
+                            "spec_id": entry.data_file.spec_id,
+                        },
+                        "readable_metrics": readable_metrics,
+                    }
+                )
 
         return pa.Table.from_pylist(
             entries,
@@ -215,26 +227,30 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
     def refs(self) -> "pa.Table":
         import pyarrow as pa
 
-        ref_schema = pa.schema([
-            pa.field("name", pa.string(), nullable=False),
-            pa.field("type", pa.dictionary(pa.int32(), pa.string()), nullable=False),
-            pa.field("snapshot_id", pa.int64(), nullable=False),
-            pa.field("max_reference_age_in_ms", pa.int64(), nullable=True),
-            pa.field("min_snapshots_to_keep", pa.int32(), nullable=True),
-            pa.field("max_snapshot_age_in_ms", pa.int64(), nullable=True),
-        ])
+        ref_schema = pa.schema(
+            [
+                pa.field("name", pa.string(), nullable=False),
+                pa.field("type", pa.dictionary(pa.int32(), pa.string()), nullable=False),
+                pa.field("snapshot_id", pa.int64(), nullable=False),
+                pa.field("max_reference_age_in_ms", pa.int64(), nullable=True),
+                pa.field("min_snapshots_to_keep", pa.int32(), nullable=True),
+                pa.field("max_snapshot_age_in_ms", pa.int64(), nullable=True),
+            ]
+        )
 
         ref_results = []
         for ref in self.tbl.metadata.refs:
             if snapshot_ref := self.tbl.metadata.refs.get(ref):
-                ref_results.append({
-                    "name": ref,
-                    "type": snapshot_ref.snapshot_ref_type.upper(),
-                    "snapshot_id": snapshot_ref.snapshot_id,
-                    "max_reference_age_in_ms": snapshot_ref.max_ref_age_ms,
-                    "min_snapshots_to_keep": snapshot_ref.min_snapshots_to_keep,
-                    "max_snapshot_age_in_ms": snapshot_ref.max_snapshot_age_ms,
-                })
+                ref_results.append(
+                    {
+                        "name": ref,
+                        "type": snapshot_ref.snapshot_ref_type.upper(),
+                        "snapshot_id": snapshot_ref.snapshot_id,
+                        "max_reference_age_in_ms": snapshot_ref.max_ref_age_ms,
+                        "min_snapshots_to_keep": snapshot_ref.min_snapshots_to_keep,
+                        "max_snapshot_age_in_ms": snapshot_ref.max_snapshot_age_ms,
+                    }
+                )
 
         return pa.Table.from_pylist(ref_results, schema=ref_schema)
 
@@ -243,27 +259,31 @@ def partitions(self, snapshot_id: Optional[int] = None) -> "pa.Table":
 
         from pyiceberg.io.pyarrow import schema_to_pyarrow
 
-        table_schema = pa.schema([
-            pa.field("record_count", pa.int64(), nullable=False),
-            pa.field("file_count", pa.int32(), nullable=False),
-            pa.field("total_data_file_size_in_bytes", pa.int64(), nullable=False),
-            pa.field("position_delete_record_count", pa.int64(), nullable=False),
-            pa.field("position_delete_file_count", pa.int32(), nullable=False),
-            pa.field("equality_delete_record_count", pa.int64(), nullable=False),
-            pa.field("equality_delete_file_count", pa.int32(), nullable=False),
-            pa.field("last_updated_at", pa.timestamp(unit="ms"), nullable=True),
-            pa.field("last_updated_snapshot_id", pa.int64(), nullable=True),
-        ])
+        table_schema = pa.schema(
+            [
+                pa.field("record_count", pa.int64(), nullable=False),
+                pa.field("file_count", pa.int32(), nullable=False),
+                pa.field("total_data_file_size_in_bytes", pa.int64(), nullable=False),
+                pa.field("position_delete_record_count", pa.int64(), nullable=False),
+                pa.field("position_delete_file_count", pa.int32(), nullable=False),
+                pa.field("equality_delete_record_count", pa.int64(), nullable=False),
+                pa.field("equality_delete_file_count", pa.int32(), nullable=False),
+                pa.field("last_updated_at", pa.timestamp(unit="ms"), nullable=True),
+                pa.field("last_updated_snapshot_id", pa.int64(), nullable=True),
+            ]
+        )
 
         partition_record = self.tbl.metadata.specs_struct()
         has_partitions = len(partition_record.fields) > 0
 
         if has_partitions:
             pa_record_struct = schema_to_pyarrow(partition_record)
-            partitions_schema = pa.schema([
-                pa.field("partition", pa_record_struct, nullable=False),
-                pa.field("spec_id", pa.int32(), nullable=False),
-            ])
+            partitions_schema = pa.schema(
+                [
+                    pa.field("partition", pa_record_struct, nullable=False),
+                    pa.field("spec_id", pa.int32(), nullable=False),
+                ]
+            )
 
             table_schema = pa.unify_schemas([partitions_schema, table_schema])
 
@@ -329,27 +349,31 @@ def update_partitions_map(
     def manifests(self) -> "pa.Table":
         import pyarrow as pa
 
-        partition_summary_schema = pa.struct([
-            pa.field("contains_null", pa.bool_(), nullable=False),
-            pa.field("contains_nan", pa.bool_(), nullable=True),
-            pa.field("lower_bound", pa.string(), nullable=True),
-            pa.field("upper_bound", pa.string(), nullable=True),
-        ])
-
-        manifest_schema = pa.schema([
-            pa.field("content", pa.int8(), nullable=False),
-            pa.field("path", pa.string(), nullable=False),
-            pa.field("length", pa.int64(), nullable=False),
-            pa.field("partition_spec_id", pa.int32(), nullable=False),
-            pa.field("added_snapshot_id", pa.int64(), nullable=False),
-            pa.field("added_data_files_count", pa.int32(), nullable=False),
-            pa.field("existing_data_files_count", pa.int32(), nullable=False),
-            pa.field("deleted_data_files_count", pa.int32(), nullable=False),
-            pa.field("added_delete_files_count", pa.int32(), nullable=False),
-            pa.field("existing_delete_files_count", pa.int32(), nullable=False),
-            pa.field("deleted_delete_files_count", pa.int32(), nullable=False),
-            pa.field("partition_summaries", pa.list_(partition_summary_schema), nullable=False),
-        ])
+        partition_summary_schema = pa.struct(
+            [
+                pa.field("contains_null", pa.bool_(), nullable=False),
+                pa.field("contains_nan", pa.bool_(), nullable=True),
+                pa.field("lower_bound", pa.string(), nullable=True),
+                pa.field("upper_bound", pa.string(), nullable=True),
+            ]
+        )
+
+        manifest_schema = pa.schema(
+            [
+                pa.field("content", pa.int8(), nullable=False),
+                pa.field("path", pa.string(), nullable=False),
+                pa.field("length", pa.int64(), nullable=False),
+                pa.field("partition_spec_id", pa.int32(), nullable=False),
+                pa.field("added_snapshot_id", pa.int64(), nullable=False),
+                pa.field("added_data_files_count", pa.int32(), nullable=False),
+                pa.field("existing_data_files_count", pa.int32(), nullable=False),
+                pa.field("deleted_data_files_count", pa.int32(), nullable=False),
+                pa.field("added_delete_files_count", pa.int32(), nullable=False),
+                pa.field("existing_delete_files_count", pa.int32(), nullable=False),
+                pa.field("deleted_delete_files_count", pa.int32(), nullable=False),
+                pa.field("partition_summaries", pa.list_(partition_summary_schema), nullable=False),
+            ]
+        )
 
         def _partition_summaries_to_rows(
             spec: PartitionSpec, partition_summaries: List[PartitionFieldSummary]
@@ -376,12 +400,14 @@ def _partition_summaries_to_rows(
                     if field_summary.upper_bound
                     else None
                 )
-                rows.append({
-                    "contains_null": field_summary.contains_null,
-                    "contains_nan": field_summary.contains_nan,
-                    "lower_bound": lower_bound,
-                    "upper_bound": upper_bound,
-                })
+                rows.append(
+                    {
+                        "contains_null": field_summary.contains_null,
+                        "contains_nan": field_summary.contains_nan,
+                        "lower_bound": lower_bound,
+                        "upper_bound": upper_bound,
+                    }
+                )
             return rows
 
         specs = self.tbl.metadata.specs()
@@ -390,22 +416,26 @@ def _partition_summaries_to_rows(
             for manifest in snapshot.manifests(self.tbl.io):
                 is_data_file = manifest.content == ManifestContent.DATA
                 is_delete_file = manifest.content == ManifestContent.DELETES
-                manifests.append({
-                    "content": manifest.content,
-                    "path": manifest.manifest_path,
-                    "length": manifest.manifest_length,
-                    "partition_spec_id": manifest.partition_spec_id,
-                    "added_snapshot_id": manifest.added_snapshot_id,
-                    "added_data_files_count": manifest.added_files_count if is_data_file else 0,
-                    "existing_data_files_count": manifest.existing_files_count if is_data_file else 0,
-                    "deleted_data_files_count": manifest.deleted_files_count if is_data_file else 0,
-                    "added_delete_files_count": manifest.added_files_count if is_delete_file else 0,
-                    "existing_delete_files_count": manifest.existing_files_count if is_delete_file else 0,
-                    "deleted_delete_files_count": manifest.deleted_files_count if is_delete_file else 0,
-                    "partition_summaries": _partition_summaries_to_rows(specs[manifest.partition_spec_id], manifest.partitions)
-                    if manifest.partitions
-                    else [],
-                })
+                manifests.append(
+                    {
+                        "content": manifest.content,
+                        "path": manifest.manifest_path,
+                        "length": manifest.manifest_length,
+                        "partition_spec_id": manifest.partition_spec_id,
+                        "added_snapshot_id": manifest.added_snapshot_id,
+                        "added_data_files_count": manifest.added_files_count if is_data_file else 0,
+                        "existing_data_files_count": manifest.existing_files_count if is_data_file else 0,
+                        "deleted_data_files_count": manifest.deleted_files_count if is_data_file else 0,
+                        "added_delete_files_count": manifest.added_files_count if is_delete_file else 0,
+                        "existing_delete_files_count": manifest.existing_files_count if is_delete_file else 0,
+                        "deleted_delete_files_count": manifest.deleted_files_count if is_delete_file else 0,
+                        "partition_summaries": _partition_summaries_to_rows(
+                            specs[manifest.partition_spec_id], manifest.partitions
+                        )
+                        if manifest.partitions
+                        else [],
+                    }
+                )
 
         return pa.Table.from_pylist(
             manifests,
@@ -417,13 +447,15 @@ def metadata_log_entries(self) -> "pa.Table":
 
         from pyiceberg.table.snapshots import MetadataLogEntry
 
-        table_schema = pa.schema([
-            pa.field("timestamp", pa.timestamp(unit="ms"), nullable=False),
-            pa.field("file", pa.string(), nullable=False),
-            pa.field("latest_snapshot_id", pa.int64(), nullable=True),
-            pa.field("latest_schema_id", pa.int32(), nullable=True),
-            pa.field("latest_sequence_number", pa.int64(), nullable=True),
-        ])
+        table_schema = pa.schema(
+            [
+                pa.field("timestamp", pa.timestamp(unit="ms"), nullable=False),
+                pa.field("file", pa.string(), nullable=False),
+                pa.field("latest_snapshot_id", pa.int64(), nullable=True),
+                pa.field("latest_schema_id", pa.int32(), nullable=True),
+                pa.field("latest_sequence_number", pa.int64(), nullable=True),
+            ]
+        )
 
         def metadata_log_entry_to_row(metadata_entry: MetadataLogEntry) -> Dict[str, Any]:
             latest_snapshot = self.tbl.snapshot_as_of_timestamp(metadata_entry.timestamp_ms)
@@ -449,12 +481,14 @@ def metadata_log_entry_to_row(metadata_entry: MetadataLogEntry) -> Dict[str, Any
     def history(self) -> "pa.Table":
         import pyarrow as pa
 
-        history_schema = pa.schema([
-            pa.field("made_current_at", pa.timestamp(unit="ms"), nullable=False),
-            pa.field("snapshot_id", pa.int64(), nullable=False),
-            pa.field("parent_id", pa.int64(), nullable=True),
-            pa.field("is_current_ancestor", pa.bool_(), nullable=False),
-        ])
+        history_schema = pa.schema(
+            [
+                pa.field("made_current_at", pa.timestamp(unit="ms"), nullable=False),
+                pa.field("snapshot_id", pa.int64(), nullable=False),
+                pa.field("parent_id", pa.int64(), nullable=True),
+                pa.field("is_current_ancestor", pa.bool_(), nullable=False),
+            ]
+        )
 
         ancestors_ids = {snapshot.snapshot_id for snapshot in ancestors_of(self.tbl.current_snapshot(), self.tbl.metadata)}
 
@@ -464,12 +498,14 @@ def history(self) -> "pa.Table":
         for snapshot_entry in metadata.snapshot_log:
             snapshot = metadata.snapshot_by_id(snapshot_entry.snapshot_id)
 
-            history.append({
-                "made_current_at": datetime.fromtimestamp(snapshot_entry.timestamp_ms / 1000.0, tz=timezone.utc),
-                "snapshot_id": snapshot_entry.snapshot_id,
-                "parent_id": snapshot.parent_snapshot_id if snapshot else None,
-                "is_current_ancestor": snapshot_entry.snapshot_id in ancestors_ids,
-            })
+            history.append(
+                {
+                    "made_current_at": datetime.fromtimestamp(snapshot_entry.timestamp_ms / 1000.0, tz=timezone.utc),
+                    "snapshot_id": snapshot_entry.snapshot_id,
+                    "parent_id": snapshot.parent_snapshot_id if snapshot else None,
+                    "is_current_ancestor": snapshot_entry.snapshot_id in ancestors_ids,
+                }
+            )
 
         return pa.Table.from_pylist(history, schema=history_schema)
 
@@ -483,39 +519,43 @@ def _files(self, snapshot_id: Optional[int] = None, data_file_filter: Optional[S
 
         def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
             pa_bound_type = schema_to_pyarrow(bound_type)
-            return pa.struct([
-                pa.field("column_size", pa.int64(), nullable=True),
-                pa.field("value_count", pa.int64(), nullable=True),
-                pa.field("null_value_count", pa.int64(), nullable=True),
-                pa.field("nan_value_count", pa.int64(), nullable=True),
-                pa.field("lower_bound", pa_bound_type, nullable=True),
-                pa.field("upper_bound", pa_bound_type, nullable=True),
-            ])
+            return pa.struct(
+                [
+                    pa.field("column_size", pa.int64(), nullable=True),
+                    pa.field("value_count", pa.int64(), nullable=True),
+                    pa.field("null_value_count", pa.int64(), nullable=True),
+                    pa.field("nan_value_count", pa.int64(), nullable=True),
+                    pa.field("lower_bound", pa_bound_type, nullable=True),
+                    pa.field("upper_bound", pa_bound_type, nullable=True),
+                ]
+            )
 
         for field in self.tbl.metadata.schema().fields:
             readable_metrics_struct.append(
                 pa.field(schema.find_column_name(field.field_id), _readable_metrics_struct(field.field_type), nullable=False)
             )
 
-        files_schema = pa.schema([
-            pa.field("content", pa.int8(), nullable=False),
-            pa.field("file_path", pa.string(), nullable=False),
-            pa.field("file_format", pa.dictionary(pa.int32(), pa.string()), nullable=False),
-            pa.field("spec_id", pa.int32(), nullable=False),
-            pa.field("record_count", pa.int64(), nullable=False),
-            pa.field("file_size_in_bytes", pa.int64(), nullable=False),
-            pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True),
-            pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
-            pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
-            pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
-            pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
-            pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
-            pa.field("key_metadata", pa.binary(), nullable=True),
-            pa.field("split_offsets", pa.list_(pa.int64()), nullable=True),
-            pa.field("equality_ids", pa.list_(pa.int32()), nullable=True),
-            pa.field("sort_order_id", pa.int32(), nullable=True),
-            pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True),
-        ])
+        files_schema = pa.schema(
+            [
+                pa.field("content", pa.int8(), nullable=False),
+                pa.field("file_path", pa.string(), nullable=False),
+                pa.field("file_format", pa.dictionary(pa.int32(), pa.string()), nullable=False),
+                pa.field("spec_id", pa.int32(), nullable=False),
+                pa.field("record_count", pa.int64(), nullable=False),
+                pa.field("file_size_in_bytes", pa.int64(), nullable=False),
+                pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True),
+                pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+                pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+                pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+                pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
+                pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
+                pa.field("key_metadata", pa.binary(), nullable=True),
+                pa.field("split_offsets", pa.list_(pa.int64()), nullable=True),
+                pa.field("equality_ids", pa.list_(pa.int32()), nullable=True),
+                pa.field("sort_order_id", pa.int32(), nullable=True),
+                pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True),
+            ]
+        )
 
         files: list[dict[str, Any]] = []
 
@@ -553,25 +593,29 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
                     }
                     for field in self.tbl.metadata.schema().fields
                 }
-                files.append({
-                    "content": data_file.content,
-                    "file_path": data_file.file_path,
-                    "file_format": data_file.file_format,
-                    "spec_id": data_file.spec_id,
-                    "record_count": data_file.record_count,
-                    "file_size_in_bytes": data_file.file_size_in_bytes,
-                    "column_sizes": dict(data_file.column_sizes) if data_file.column_sizes is not None else None,
-                    "value_counts": dict(data_file.value_counts) if data_file.value_counts is not None else None,
-                    "null_value_counts": dict(data_file.null_value_counts) if data_file.null_value_counts is not None else None,
-                    "nan_value_counts": dict(data_file.nan_value_counts) if data_file.nan_value_counts is not None else None,
-                    "lower_bounds": dict(data_file.lower_bounds) if data_file.lower_bounds is not None else None,
-                    "upper_bounds": dict(data_file.upper_bounds) if data_file.upper_bounds is not None else None,
-                    "key_metadata": data_file.key_metadata,
-                    "split_offsets": data_file.split_offsets,
-                    "equality_ids": data_file.equality_ids,
-                    "sort_order_id": data_file.sort_order_id,
-                    "readable_metrics": readable_metrics,
-                })
+                files.append(
+                    {
+                        "content": data_file.content,
+                        "file_path": data_file.file_path,
+                        "file_format": data_file.file_format,
+                        "spec_id": data_file.spec_id,
+                        "record_count": data_file.record_count,
+                        "file_size_in_bytes": data_file.file_size_in_bytes,
+                        "column_sizes": dict(data_file.column_sizes) if data_file.column_sizes is not None else None,
+                        "value_counts": dict(data_file.value_counts) if data_file.value_counts is not None else None,
+                        "null_value_counts": dict(data_file.null_value_counts)
+                        if data_file.null_value_counts is not None
+                        else None,
+                        "nan_value_counts": dict(data_file.nan_value_counts) if data_file.nan_value_counts is not None else None,
+                        "lower_bounds": dict(data_file.lower_bounds) if data_file.lower_bounds is not None else None,
+                        "upper_bounds": dict(data_file.upper_bounds) if data_file.upper_bounds is not None else None,
+                        "key_metadata": data_file.key_metadata,
+                        "split_offsets": data_file.split_offsets,
+                        "equality_ids": data_file.equality_ids,
+                        "sort_order_id": data_file.sort_order_id,
+                        "readable_metrics": readable_metrics,
+                    }
+                )
 
         return pa.Table.from_pylist(
             files,
diff --git a/ruff.toml b/ruff.toml
index caaa108c84..11fd2a957b 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -58,7 +58,7 @@ select = [
     "I", # isort
     "UP", # pyupgrade
 ]
-ignore = ["E501","E203","B024","B028","UP037"]
+ignore = ["E501","E203","B024","B028","UP037", "UP035", "UP006"]
 
 # Allow autofix for all enabled rules (when `--fix`) is provided.
 fixable = ["ALL"]
diff --git a/tests/avro/test_resolver.py b/tests/avro/test_resolver.py
index decd9060a4..b5388b5ebb 100644
--- a/tests/avro/test_resolver.py
+++ b/tests/avro/test_resolver.py
@@ -322,30 +322,34 @@ def test_resolver_initial_value() -> None:
 
 def test_resolve_writer() -> None:
     actual = resolve_writer(record_schema=MANIFEST_ENTRY_SCHEMAS[2], file_schema=MANIFEST_ENTRY_SCHEMAS[1])
-    expected = StructWriter((
-        (0, IntegerWriter()),
-        (1, IntegerWriter()),
+    expected = StructWriter(
         (
-            4,
-            StructWriter((
-                (1, StringWriter()),
-                (2, StringWriter()),
-                (3, StructWriter(())),
-                (4, IntegerWriter()),
-                (5, IntegerWriter()),
-                (None, DefaultWriter(writer=IntegerWriter(), value=67108864)),
-                (6, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
-                (7, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
-                (8, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
-                (9, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
-                (10, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=BinaryWriter()))),
-                (11, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=BinaryWriter()))),
-                (12, OptionWriter(option=BinaryWriter())),
-                (13, OptionWriter(option=ListWriter(element_writer=IntegerWriter()))),
-                (15, OptionWriter(option=IntegerWriter())),
-            )),
-        ),
-    ))
+            (0, IntegerWriter()),
+            (1, IntegerWriter()),
+            (
+                4,
+                StructWriter(
+                    (
+                        (1, StringWriter()),
+                        (2, StringWriter()),
+                        (3, StructWriter(())),
+                        (4, IntegerWriter()),
+                        (5, IntegerWriter()),
+                        (None, DefaultWriter(writer=IntegerWriter(), value=67108864)),
+                        (6, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
+                        (7, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
+                        (8, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
+                        (9, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
+                        (10, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=BinaryWriter()))),
+                        (11, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=BinaryWriter()))),
+                        (12, OptionWriter(option=BinaryWriter())),
+                        (13, OptionWriter(option=ListWriter(element_writer=IntegerWriter()))),
+                        (15, OptionWriter(option=IntegerWriter())),
+                    )
+                ),
+            ),
+        )
+    )
 
     assert actual == expected
 
diff --git a/tests/avro/test_writer.py b/tests/avro/test_writer.py
index 5a531c7748..39b8ecc393 100644
--- a/tests/avro/test_writer.py
+++ b/tests/avro/test_writer.py
@@ -178,15 +178,17 @@ class MyStruct(Record):
 
     construct_writer(schema).write(encoder, my_struct)
 
-    assert output.getbuffer() == b"".join([
-        b"\x18",
-        zigzag_encode(len(my_struct.properties)),
-        zigzag_encode(1),
-        zigzag_encode(2),
-        zigzag_encode(3),
-        zigzag_encode(4),
-        b"\x00",
-    ])
+    assert output.getbuffer() == b"".join(
+        [
+            b"\x18",
+            zigzag_encode(len(my_struct.properties)),
+            zigzag_encode(1),
+            zigzag_encode(2),
+            zigzag_encode(3),
+            zigzag_encode(4),
+            b"\x00",
+        ]
+    )
 
 
 def test_write_struct_with_list() -> None:
@@ -206,15 +208,17 @@ class MyStruct(Record):
 
     construct_writer(schema).write(encoder, my_struct)
 
-    assert output.getbuffer() == b"".join([
-        b"\x18",
-        zigzag_encode(len(my_struct.properties)),
-        zigzag_encode(1),
-        zigzag_encode(2),
-        zigzag_encode(3),
-        zigzag_encode(4),
-        b"\x00",
-    ])
+    assert output.getbuffer() == b"".join(
+        [
+            b"\x18",
+            zigzag_encode(len(my_struct.properties)),
+            zigzag_encode(1),
+            zigzag_encode(2),
+            zigzag_encode(3),
+            zigzag_encode(4),
+            b"\x00",
+        ]
+    )
 
 
 def test_write_decimal() -> None:
diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py
index 2a4b3a7a1f..21aa9677bd 100644
--- a/tests/catalog/test_rest.py
+++ b/tests/catalog/test_rest.py
@@ -323,19 +323,19 @@ def test_properties_sets_headers(requests_mock: Mocker) -> None:
         **{"header.Content-Type": "application/vnd.api+json", "header.Customized-Header": "some/value"},
     )
 
-    assert catalog._session.headers.get("Content-type") == "application/json", (
-        "Expected 'Content-Type' default header not to be overwritten"
-    )
-    assert requests_mock.last_request.headers["Content-type"] == "application/json", (
-        "Config request did not include expected 'Content-Type' header"
-    )
+    assert (
+        catalog._session.headers.get("Content-type") == "application/json"
+    ), "Expected 'Content-Type' default header not to be overwritten"
+    assert (
+        requests_mock.last_request.headers["Content-type"] == "application/json"
+    ), "Config request did not include expected 'Content-Type' header"
 
-    assert catalog._session.headers.get("Customized-Header") == "some/value", (
-        "Expected 'Customized-Header' header to be 'some/value'"
-    )
-    assert requests_mock.last_request.headers["Customized-Header"] == "some/value", (
-        "Config request did not include expected 'Customized-Header' header"
-    )
+    assert (
+        catalog._session.headers.get("Customized-Header") == "some/value"
+    ), "Expected 'Customized-Header' header to be 'some/value'"
+    assert (
+        requests_mock.last_request.headers["Customized-Header"] == "some/value"
+    ), "Config request did not include expected 'Customized-Header' header"
 
 
 def test_config_sets_headers(requests_mock: Mocker) -> None:
@@ -352,19 +352,19 @@ def test_config_sets_headers(requests_mock: Mocker) -> None:
     catalog = RestCatalog("rest", uri=TEST_URI, warehouse="s3://some-bucket")
     catalog.create_namespace(namespace)
 
-    assert catalog._session.headers.get("Content-type") == "application/json", (
-        "Expected 'Content-Type' default header not to be overwritten"
-    )
-    assert requests_mock.last_request.headers["Content-type"] == "application/json", (
-        "Create namespace request did not include expected 'Content-Type' header"
-    )
+    assert (
+        catalog._session.headers.get("Content-type") == "application/json"
+    ), "Expected 'Content-Type' default header not to be overwritten"
+    assert (
+        requests_mock.last_request.headers["Content-type"] == "application/json"
+    ), "Create namespace request did not include expected 'Content-Type' header"
 
-    assert catalog._session.headers.get("Customized-Header") == "some/value", (
-        "Expected 'Customized-Header' header to be 'some/value'"
-    )
-    assert requests_mock.last_request.headers["Customized-Header"] == "some/value", (
-        "Create namespace request did not include expected 'Customized-Header' header"
-    )
+    assert (
+        catalog._session.headers.get("Customized-Header") == "some/value"
+    ), "Expected 'Customized-Header' header to be 'some/value'"
+    assert (
+        requests_mock.last_request.headers["Customized-Header"] == "some/value"
+    ), "Create namespace request did not include expected 'Customized-Header' header"
 
 
 @pytest.mark.filterwarnings(
diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py
index 7f72568b41..cffc14d9d7 100644
--- a/tests/catalog/test_sql.py
+++ b/tests/catalog/test_sql.py
@@ -401,12 +401,14 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier)
             pa.array([True, None, False, True]),  # 'baz' column
             pa.array([None, "A", "B", "C"]),  # 'large' column
         ],
-        schema=pa.schema([
-            pa.field("foo", pa.large_string(), nullable=True),
-            pa.field("bar", pa.int32(), nullable=False),
-            pa.field("baz", pa.bool_(), nullable=True),
-            pa.field("large", pa.large_string(), nullable=True),
-        ]),
+        schema=pa.schema(
+            [
+                pa.field("foo", pa.large_string(), nullable=True),
+                pa.field("bar", pa.int32(), nullable=False),
+                pa.field("baz", pa.bool_(), nullable=True),
+                pa.field("large", pa.large_string(), nullable=True),
+            ]
+        ),
     )
     namespace = Catalog.namespace_from(table_identifier)
     catalog.create_namespace(namespace)
@@ -1426,10 +1428,12 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None:
             "foo": ["a", None, "z"],
             "bar": [19, None, 25],
         },
-        schema=pa.schema([
-            pa.field("foo", pa.large_string(), nullable=True),
-            pa.field("bar", pa.int32(), nullable=True),
-        ]),
+        schema=pa.schema(
+            [
+                pa.field("foo", pa.large_string(), nullable=True),
+                pa.field("bar", pa.int32(), nullable=True),
+            ]
+        ),
     )
 
     with tbl.transaction() as txn:
@@ -1474,10 +1478,12 @@ def test_create_table_transaction(catalog: SqlCatalog, format_version: int) -> N
             "foo": ["a", None, "z"],
             "bar": [19, None, 25],
         },
-        schema=pa.schema([
-            pa.field("foo", pa.large_string(), nullable=True),
-            pa.field("bar", pa.int32(), nullable=True),
-        ]),
+        schema=pa.schema(
+            [
+                pa.field("foo", pa.large_string(), nullable=True),
+                pa.field("bar", pa.int32(), nullable=True),
+            ]
+        ),
     )
 
     with catalog.create_table_transaction(
diff --git a/tests/conftest.py b/tests/conftest.py
index 22329b3882..ef980f3818 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -353,49 +353,57 @@ def table_schema_with_all_types() -> Schema:
 def pyarrow_schema_simple_without_ids() -> "pa.Schema":
     import pyarrow as pa
 
-    return pa.schema([
-        pa.field("foo", pa.string(), nullable=True),
-        pa.field("bar", pa.int32(), nullable=False),
-        pa.field("baz", pa.bool_(), nullable=True),
-    ])
+    return pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=True),
+            pa.field("bar", pa.int32(), nullable=False),
+            pa.field("baz", pa.bool_(), nullable=True),
+        ]
+    )
 
 
 @pytest.fixture(scope="session")
 def pyarrow_schema_nested_without_ids() -> "pa.Schema":
     import pyarrow as pa
 
-    return pa.schema([
-        pa.field("foo", pa.string(), nullable=False),
-        pa.field("bar", pa.int32(), nullable=False),
-        pa.field("baz", pa.bool_(), nullable=True),
-        pa.field("qux", pa.list_(pa.string()), nullable=False),
-        pa.field(
-            "quux",
-            pa.map_(
-                pa.string(),
-                pa.map_(pa.string(), pa.int32()),
+    return pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=False),
+            pa.field("bar", pa.int32(), nullable=False),
+            pa.field("baz", pa.bool_(), nullable=True),
+            pa.field("qux", pa.list_(pa.string()), nullable=False),
+            pa.field(
+                "quux",
+                pa.map_(
+                    pa.string(),
+                    pa.map_(pa.string(), pa.int32()),
+                ),
+                nullable=False,
             ),
-            nullable=False,
-        ),
-        pa.field(
-            "location",
-            pa.list_(
-                pa.struct([
-                    pa.field("latitude", pa.float32(), nullable=False),
-                    pa.field("longitude", pa.float32(), nullable=False),
-                ]),
+            pa.field(
+                "location",
+                pa.list_(
+                    pa.struct(
+                        [
+                            pa.field("latitude", pa.float32(), nullable=False),
+                            pa.field("longitude", pa.float32(), nullable=False),
+                        ]
+                    ),
+                ),
+                nullable=False,
             ),
-            nullable=False,
-        ),
-        pa.field(
-            "person",
-            pa.struct([
-                pa.field("name", pa.string(), nullable=True),
-                pa.field("age", pa.int32(), nullable=False),
-            ]),
-            nullable=True,
-        ),
-    ])
+            pa.field(
+                "person",
+                pa.struct(
+                    [
+                        pa.field("name", pa.string(), nullable=True),
+                        pa.field("age", pa.int32(), nullable=False),
+                    ]
+                ),
+                nullable=True,
+            ),
+        ]
+    )
 
 
 @pytest.fixture(scope="session")
@@ -2314,26 +2322,28 @@ def spark() -> "SparkSession":
 def pa_schema() -> "pa.Schema":
     import pyarrow as pa
 
-    return pa.schema([
-        ("bool", pa.bool_()),
-        ("string", pa.large_string()),
-        ("string_long", pa.large_string()),
-        ("int", pa.int32()),
-        ("long", pa.int64()),
-        ("float", pa.float32()),
-        ("double", pa.float64()),
-        # Not supported by Spark
-        # ("time", pa.time64('us')),
-        ("timestamp", pa.timestamp(unit="us")),
-        ("timestamptz", pa.timestamp(unit="us", tz="UTC")),
-        ("date", pa.date32()),
-        # Not supported by Spark
-        # ("time", pa.time64("us")),
-        # Not natively supported by Arrow
-        # ("uuid", pa.fixed(16)),
-        ("binary", pa.large_binary()),
-        ("fixed", pa.binary(16)),
-    ])
+    return pa.schema(
+        [
+            ("bool", pa.bool_()),
+            ("string", pa.large_string()),
+            ("string_long", pa.large_string()),
+            ("int", pa.int32()),
+            ("long", pa.int64()),
+            ("float", pa.float32()),
+            ("double", pa.float64()),
+            # Not supported by Spark
+            # ("time", pa.time64('us')),
+            ("timestamp", pa.timestamp(unit="us")),
+            ("timestamptz", pa.timestamp(unit="us", tz="UTC")),
+            ("date", pa.date32()),
+            # Not supported by Spark
+            # ("time", pa.time64("us")),
+            # Not natively supported by Arrow
+            # ("uuid", pa.fixed(16)),
+            ("binary", pa.large_binary()),
+            ("fixed", pa.binary(16)),
+        ]
+    )
 
 
 @pytest.fixture(scope="session")
@@ -2415,11 +2425,13 @@ def arrow_table_date_timestamps() -> "pa.Table":
                 None,
             ],
         },
-        schema=pa.schema([
-            ("date", pa.date32()),
-            ("timestamp", pa.timestamp(unit="us")),
-            ("timestamptz", pa.timestamp(unit="us", tz="UTC")),
-        ]),
+        schema=pa.schema(
+            [
+                ("date", pa.date32()),
+                ("timestamp", pa.timestamp(unit="us")),
+                ("timestamptz", pa.timestamp(unit="us", tz="UTC")),
+            ]
+        ),
     )
 
 
@@ -2438,19 +2450,21 @@ def arrow_table_schema_with_all_timestamp_precisions() -> "pa.Schema":
     """Pyarrow Schema with all supported timestamp types."""
     import pyarrow as pa
 
-    return pa.schema([
-        ("timestamp_s", pa.timestamp(unit="s")),
-        ("timestamptz_s", pa.timestamp(unit="s", tz="UTC")),
-        ("timestamp_ms", pa.timestamp(unit="ms")),
-        ("timestamptz_ms", pa.timestamp(unit="ms", tz="UTC")),
-        ("timestamp_us", pa.timestamp(unit="us")),
-        ("timestamptz_us", pa.timestamp(unit="us", tz="UTC")),
-        ("timestamp_ns", pa.timestamp(unit="ns")),
-        ("timestamptz_ns", pa.timestamp(unit="ns", tz="UTC")),
-        ("timestamptz_us_etc_utc", pa.timestamp(unit="us", tz="Etc/UTC")),
-        ("timestamptz_ns_z", pa.timestamp(unit="ns", tz="Z")),
-        ("timestamptz_s_0000", pa.timestamp(unit="s", tz="+00:00")),
-    ])
+    return pa.schema(
+        [
+            ("timestamp_s", pa.timestamp(unit="s")),
+            ("timestamptz_s", pa.timestamp(unit="s", tz="UTC")),
+            ("timestamp_ms", pa.timestamp(unit="ms")),
+            ("timestamptz_ms", pa.timestamp(unit="ms", tz="UTC")),
+            ("timestamp_us", pa.timestamp(unit="us")),
+            ("timestamptz_us", pa.timestamp(unit="us", tz="UTC")),
+            ("timestamp_ns", pa.timestamp(unit="ns")),
+            ("timestamptz_ns", pa.timestamp(unit="ns", tz="UTC")),
+            ("timestamptz_us_etc_utc", pa.timestamp(unit="us", tz="Etc/UTC")),
+            ("timestamptz_ns_z", pa.timestamp(unit="ns", tz="Z")),
+            ("timestamptz_s_0000", pa.timestamp(unit="s", tz="+00:00")),
+        ]
+    )
 
 
 @pytest.fixture(scope="session")
@@ -2459,51 +2473,53 @@ def arrow_table_with_all_timestamp_precisions(arrow_table_schema_with_all_timest
     import pandas as pd
     import pyarrow as pa
 
-    test_data = pd.DataFrame({
-        "timestamp_s": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
-        "timestamptz_s": [
-            datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
-            None,
-            datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
-        ],
-        "timestamp_ms": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
-        "timestamptz_ms": [
-            datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
-            None,
-            datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
-        ],
-        "timestamp_us": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
-        "timestamptz_us": [
-            datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
-            None,
-            datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
-        ],
-        "timestamp_ns": [
-            pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=6),
-            None,
-            pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=7),
-        ],
-        "timestamptz_ns": [
-            datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
-            None,
-            datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
-        ],
-        "timestamptz_us_etc_utc": [
-            datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
-            None,
-            datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
-        ],
-        "timestamptz_ns_z": [
-            pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=6, tz="UTC"),
-            None,
-            pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=7, tz="UTC"),
-        ],
-        "timestamptz_s_0000": [
-            datetime(2023, 1, 1, 19, 25, 1, tzinfo=timezone.utc),
-            None,
-            datetime(2023, 3, 1, 19, 25, 1, tzinfo=timezone.utc),
-        ],
-    })
+    test_data = pd.DataFrame(
+        {
+            "timestamp_s": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
+            "timestamptz_s": [
+                datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+                None,
+                datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+            ],
+            "timestamp_ms": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
+            "timestamptz_ms": [
+                datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+                None,
+                datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+            ],
+            "timestamp_us": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
+            "timestamptz_us": [
+                datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+                None,
+                datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+            ],
+            "timestamp_ns": [
+                pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=6),
+                None,
+                pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=7),
+            ],
+            "timestamptz_ns": [
+                datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+                None,
+                datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+            ],
+            "timestamptz_us_etc_utc": [
+                datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+                None,
+                datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+            ],
+            "timestamptz_ns_z": [
+                pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=6, tz="UTC"),
+                None,
+                pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=7, tz="UTC"),
+            ],
+            "timestamptz_s_0000": [
+                datetime(2023, 1, 1, 19, 25, 1, tzinfo=timezone.utc),
+                None,
+                datetime(2023, 3, 1, 19, 25, 1, tzinfo=timezone.utc),
+            ],
+        }
+    )
     return pa.Table.from_pandas(test_data, schema=arrow_table_schema_with_all_timestamp_precisions)
 
 
@@ -2512,19 +2528,21 @@ def arrow_table_schema_with_all_microseconds_timestamp_precisions() -> "pa.Schem
     """Pyarrow Schema with all microseconds timestamp."""
     import pyarrow as pa
 
-    return pa.schema([
-        ("timestamp_s", pa.timestamp(unit="us")),
-        ("timestamptz_s", pa.timestamp(unit="us", tz="UTC")),
-        ("timestamp_ms", pa.timestamp(unit="us")),
-        ("timestamptz_ms", pa.timestamp(unit="us", tz="UTC")),
-        ("timestamp_us", pa.timestamp(unit="us")),
-        ("timestamptz_us", pa.timestamp(unit="us", tz="UTC")),
-        ("timestamp_ns", pa.timestamp(unit="us")),
-        ("timestamptz_ns", pa.timestamp(unit="us", tz="UTC")),
-        ("timestamptz_us_etc_utc", pa.timestamp(unit="us", tz="UTC")),
-        ("timestamptz_ns_z", pa.timestamp(unit="us", tz="UTC")),
-        ("timestamptz_s_0000", pa.timestamp(unit="us", tz="UTC")),
-    ])
+    return pa.schema(
+        [
+            ("timestamp_s", pa.timestamp(unit="us")),
+            ("timestamptz_s", pa.timestamp(unit="us", tz="UTC")),
+            ("timestamp_ms", pa.timestamp(unit="us")),
+            ("timestamptz_ms", pa.timestamp(unit="us", tz="UTC")),
+            ("timestamp_us", pa.timestamp(unit="us")),
+            ("timestamptz_us", pa.timestamp(unit="us", tz="UTC")),
+            ("timestamp_ns", pa.timestamp(unit="us")),
+            ("timestamptz_ns", pa.timestamp(unit="us", tz="UTC")),
+            ("timestamptz_us_etc_utc", pa.timestamp(unit="us", tz="UTC")),
+            ("timestamptz_ns_z", pa.timestamp(unit="us", tz="UTC")),
+            ("timestamptz_s_0000", pa.timestamp(unit="us", tz="UTC")),
+        ]
+    )
 
 
 @pytest.fixture(scope="session")
@@ -2578,13 +2596,15 @@ def pyarrow_schema_with_promoted_types() -> "pa.Schema":
     """Pyarrow Schema with longs, doubles and uuid in simple and nested types."""
     import pyarrow as pa
 
-    return pa.schema((
-        pa.field("long", pa.int32(), nullable=True),  # can support upcasting integer to long
-        pa.field("list", pa.list_(pa.int32()), nullable=False),  # can support upcasting integer to long
-        pa.field("map", pa.map_(pa.string(), pa.int32()), nullable=False),  # can support upcasting integer to long
-        pa.field("double", pa.float32(), nullable=True),  # can support upcasting float to double
-        pa.field("uuid", pa.binary(length=16), nullable=True),  # can support upcasting float to double
-    ))
+    return pa.schema(
+        (
+            pa.field("long", pa.int32(), nullable=True),  # can support upcasting integer to long
+            pa.field("list", pa.list_(pa.int32()), nullable=False),  # can support upcasting integer to long
+            pa.field("map", pa.map_(pa.string(), pa.int32()), nullable=False),  # can support upcasting integer to long
+            pa.field("double", pa.float32(), nullable=True),  # can support upcasting float to double
+            pa.field("uuid", pa.binary(length=16), nullable=True),  # can support upcasting float to double
+        )
+    )
 
 
 @pytest.fixture(scope="session")
diff --git a/tests/expressions/test_evaluator.py b/tests/expressions/test_evaluator.py
index f8a9a8806d..e2b1f27377 100644
--- a/tests/expressions/test_evaluator.py
+++ b/tests/expressions/test_evaluator.py
@@ -681,25 +681,25 @@ def data_file_nan() -> DataFile:
 
 def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file_nan: Schema, data_file_nan: DataFile) -> None:
     for operator in [LessThan, LessThanOrEqual]:
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert not should_read, "Should not match: all nan column doesn't contain number"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert not should_read, "Should not match: 1 is smaller than lower bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan)  # type: ignore[arg-type]
         assert should_read, "Should match: 10 is larger than lower bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert should_read, "Should match: no visibility"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert not should_read, "Should not match: all nan column doesn't contain number"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert not should_read, "Should not match: 1 is smaller than lower bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval(
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval(  # type: ignore[arg-type]
             data_file_nan
         )
         assert should_read, "Should match: 10 larger than lower bound"
@@ -709,30 +709,30 @@ def test_inclusive_metrics_evaluator_greater_than_and_greater_than_equal(
     schema_data_file_nan: Schema, data_file_nan: DataFile
 ) -> None:
     for operator in [GreaterThan, GreaterThanOrEqual]:
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert not should_read, "Should not match: all nan column doesn't contain number"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert should_read, "Should match: upper bound is larger than 1"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan)  # type: ignore[arg-type]
         assert should_read, "Should match: upper bound is larger than 10"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert should_read, "Should match: no visibility"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert not should_read, "Should not match: all nan column doesn't contain number"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert should_read, "Should match: 1 is smaller than upper bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval(
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval(  # type: ignore[arg-type]
             data_file_nan
         )
         assert should_read, "Should match: 10 is smaller than upper bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 30)).eval(data_file_nan)
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 30)).eval(data_file_nan)  # type: ignore[arg-type]
         assert not should_read, "Should not match: 30 is greater than upper bound"
 
 
diff --git a/tests/expressions/test_visitors.py b/tests/expressions/test_visitors.py
index d61c193719..94bfcf076c 100644
--- a/tests/expressions/test_visitors.py
+++ b/tests/expressions/test_visitors.py
@@ -947,95 +947,95 @@ def manifest() -> ManifestFile:
 
 
 def test_all_nulls(schema: Schema, manifest: ManifestFile) -> None:
-    assert not _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(manifest), (
-        "Should skip: all nulls column with non-floating type contains all null"
-    )
+    assert not _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(
+        manifest
+    ), "Should skip: all nulls column with non-floating type contains all null"
 
-    assert _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(manifest), (
-        "Should read: no NaN information may indicate presence of NaN value"
-    )
+    assert _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: no NaN information may indicate presence of NaN value"
 
-    assert _ManifestEvalVisitor(schema, NotNull(Reference("some_nulls")), case_sensitive=True).eval(manifest), (
-        "Should read: column with some nulls contains a non-null value"
-    )
+    assert _ManifestEvalVisitor(schema, NotNull(Reference("some_nulls")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: column with some nulls contains a non-null value"
 
-    assert _ManifestEvalVisitor(schema, NotNull(Reference("no_nulls")), case_sensitive=True).eval(manifest), (
-        "Should read: non-null column contains a non-null value"
-    )
+    assert _ManifestEvalVisitor(schema, NotNull(Reference("no_nulls")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: non-null column contains a non-null value"
 
 
 def test_no_nulls(schema: Schema, manifest: ManifestFile) -> None:
-    assert _ManifestEvalVisitor(schema, IsNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(manifest), (
-        "Should read: at least one null value in all null column"
-    )
+    assert _ManifestEvalVisitor(schema, IsNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: at least one null value in all null column"
 
-    assert _ManifestEvalVisitor(schema, IsNull(Reference("some_nulls")), case_sensitive=True).eval(manifest), (
-        "Should read: column with some nulls contains a null value"
-    )
+    assert _ManifestEvalVisitor(schema, IsNull(Reference("some_nulls")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: column with some nulls contains a null value"
 
-    assert not _ManifestEvalVisitor(schema, IsNull(Reference("no_nulls")), case_sensitive=True).eval(manifest), (
-        "Should skip: non-null column contains no null values"
-    )
+    assert not _ManifestEvalVisitor(schema, IsNull(Reference("no_nulls")), case_sensitive=True).eval(
+        manifest
+    ), "Should skip: non-null column contains no null values"
 
-    assert _ManifestEvalVisitor(schema, IsNull(Reference("both_nan_and_null")), case_sensitive=True).eval(manifest), (
-        "Should read: both_nan_and_null column contains no null values"
-    )
+    assert _ManifestEvalVisitor(schema, IsNull(Reference("both_nan_and_null")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: both_nan_and_null column contains no null values"
 
 
 def test_is_nan(schema: Schema, manifest: ManifestFile) -> None:
-    assert _ManifestEvalVisitor(schema, IsNaN(Reference("float")), case_sensitive=True).eval(manifest), (
-        "Should read: no information on if there are nan value in float column"
-    )
+    assert _ManifestEvalVisitor(schema, IsNaN(Reference("float")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: no information on if there are nan value in float column"
 
-    assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_double")), case_sensitive=True).eval(manifest), (
-        "Should read: no NaN information may indicate presence of NaN value"
-    )
+    assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_double")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: no NaN information may indicate presence of NaN value"
 
-    assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(manifest), (
-        "Should read: no NaN information may indicate presence of NaN value"
-    )
+    assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: no NaN information may indicate presence of NaN value"
 
-    assert not _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(manifest), (
-        "Should skip: no nan column doesn't contain nan value"
-    )
+    assert not _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(
+        manifest
+    ), "Should skip: no nan column doesn't contain nan value"
 
-    assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nans")), case_sensitive=True).eval(manifest), (
-        "Should read: all_nans column contains nan value"
-    )
+    assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nans")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: all_nans column contains nan value"
 
-    assert _ManifestEvalVisitor(schema, IsNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(manifest), (
-        "Should read: both_nan_and_null column contains nan value"
-    )
+    assert _ManifestEvalVisitor(schema, IsNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: both_nan_and_null column contains nan value"
 
-    assert not _ManifestEvalVisitor(schema, IsNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(manifest), (
-        "Should skip: no_nan_or_null column doesn't contain nan value"
-    )
+    assert not _ManifestEvalVisitor(schema, IsNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(
+        manifest
+    ), "Should skip: no_nan_or_null column doesn't contain nan value"
 
 
 def test_not_nan(schema: Schema, manifest: ManifestFile) -> None:
-    assert _ManifestEvalVisitor(schema, NotNaN(Reference("float")), case_sensitive=True).eval(manifest), (
-        "Should read: no information on if there are nan value in float column"
-    )
+    assert _ManifestEvalVisitor(schema, NotNaN(Reference("float")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: no information on if there are nan value in float column"
 
-    assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_double")), case_sensitive=True).eval(manifest), (
-        "Should read: all null column contains non nan value"
-    )
+    assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_double")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: all null column contains non nan value"
 
-    assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(manifest), (
-        "Should read: no_nans column contains non nan value"
-    )
+    assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: no_nans column contains non nan value"
 
-    assert not _ManifestEvalVisitor(schema, NotNaN(Reference("all_nans")), case_sensitive=True).eval(manifest), (
-        "Should skip: all nans column doesn't contain non nan value"
-    )
+    assert not _ManifestEvalVisitor(schema, NotNaN(Reference("all_nans")), case_sensitive=True).eval(
+        manifest
+    ), "Should skip: all nans column doesn't contain non nan value"
 
-    assert _ManifestEvalVisitor(schema, NotNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(manifest), (
-        "Should read: both_nan_and_null nans column contains non nan value"
-    )
+    assert _ManifestEvalVisitor(schema, NotNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: both_nan_and_null nans column contains non nan value"
 
-    assert _ManifestEvalVisitor(schema, NotNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(manifest), (
-        "Should read: no_nan_or_null column contains non nan value"
-    )
+    assert _ManifestEvalVisitor(schema, NotNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: no_nan_or_null column contains non nan value"
 
 
 def test_missing_stats(schema: Schema, manifest_no_stats: ManifestFile) -> None:
@@ -1053,15 +1053,15 @@ def test_missing_stats(schema: Schema, manifest_no_stats: ManifestFile) -> None:
     ]
 
     for expr in expressions:
-        assert _ManifestEvalVisitor(schema, expr, case_sensitive=True).eval(manifest_no_stats), (
-            f"Should read when missing stats for expr: {expr}"
-        )
+        assert _ManifestEvalVisitor(schema, expr, case_sensitive=True).eval(
+            manifest_no_stats
+        ), f"Should read when missing stats for expr: {expr}"
 
 
 def test_not(schema: Schema, manifest: ManifestFile) -> None:
-    assert _ManifestEvalVisitor(schema, Not(LessThan(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(manifest), (
-        "Should read: not(false)"
-    )
+    assert _ManifestEvalVisitor(schema, Not(LessThan(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(
+        manifest
+    ), "Should read: not(false)"
 
     assert not _ManifestEvalVisitor(schema, Not(GreaterThan(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(
         manifest
@@ -1118,21 +1118,21 @@ def test_or(schema: Schema, manifest: ManifestFile) -> None:
 
 
 def test_integer_lt(schema: Schema, manifest: ManifestFile) -> None:
-    assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(manifest), (
-        "Should not read: id range below lower bound (5 < 30)"
-    )
+    assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(
+        manifest
+    ), "Should not read: id range below lower bound (5 < 30)"
 
-    assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), (
-        "Should not read: id range below lower bound (30 is not < 30)"
-    )
+    assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should not read: id range below lower bound (30 is not < 30)"
 
-    assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE + 1), case_sensitive=True).eval(manifest), (
-        "Should read: one possible id"
-    )
+    assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE + 1), case_sensitive=True).eval(
+        manifest
+    ), "Should read: one possible id"
 
-    assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
-        "Should read: may possible ids"
-    )
+    assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should read: may possible ids"
 
 
 def test_integer_lt_eq(schema: Schema, manifest: ManifestFile) -> None:
@@ -1144,13 +1144,13 @@ def test_integer_lt_eq(schema: Schema, manifest: ManifestFile) -> None:
         manifest
     ), "Should not read: id range below lower bound (29 < 30)"
 
-    assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), (
-        "Should read: one possible id"
-    )
+    assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should read: one possible id"
 
-    assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
-        "Should read: many possible ids"
-    )
+    assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should read: many possible ids"
 
 
 def test_integer_gt(schema: Schema, manifest: ManifestFile) -> None:
@@ -1158,17 +1158,17 @@ def test_integer_gt(schema: Schema, manifest: ManifestFile) -> None:
         manifest
     ), "Should not read: id range above upper bound (85 < 79)"
 
-    assert not _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
-        "Should not read: id range above upper bound (79 is not > 79)"
-    )
+    assert not _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should not read: id range above upper bound (79 is not > 79)"
 
-    assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 1), case_sensitive=True).eval(manifest), (
-        "Should read: one possible id"
-    )
+    assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 1), case_sensitive=True).eval(
+        manifest
+    ), "Should read: one possible id"
 
-    assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(manifest), (
-        "Should read: may possible ids"
-    )
+    assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(
+        manifest
+    ), "Should read: may possible ids"
 
 
 def test_integer_gt_eq(schema: Schema, manifest: ManifestFile) -> None:
@@ -1180,133 +1180,133 @@ def test_integer_gt_eq(schema: Schema, manifest: ManifestFile) -> None:
         manifest
     ), "Should not read: id range above upper bound (80 > 79)"
 
-    assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
-        "Should read: one possible id"
-    )
+    assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should read: one possible id"
 
-    assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
-        "Should read: may possible ids"
-    )
+    assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should read: may possible ids"
 
 
 def test_integer_eq(schema: Schema, manifest: ManifestFile) -> None:
-    assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(manifest), (
-        "Should not read: id below lower bound"
-    )
+    assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(
+        manifest
+    ), "Should not read: id below lower bound"
 
-    assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(manifest), (
-        "Should not read: id below lower bound"
-    )
+    assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(
+        manifest
+    ), "Should not read: id below lower bound"
 
-    assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), (
-        "Should read: id equal to lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id equal to lower bound"
 
-    assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(manifest), (
-        "Should read: id between lower and upper bounds"
-    )
+    assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id between lower and upper bounds"
 
-    assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
-        "Should read: id equal to upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id equal to upper bound"
 
-    assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(manifest), (
-        "Should not read: id above upper bound"
-    )
+    assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(
+        manifest
+    ), "Should not read: id above upper bound"
 
-    assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(manifest), (
-        "Should not read: id above upper bound"
-    )
+    assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(
+        manifest
+    ), "Should not read: id above upper bound"
 
 
 def test_integer_not_eq(schema: Schema, manifest: ManifestFile) -> None:
-    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(manifest), (
-        "Should read: id below lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id below lower bound"
 
-    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(manifest), (
-        "Should read: id below lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id below lower bound"
 
-    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), (
-        "Should read: id equal to lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id equal to lower bound"
 
-    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(manifest), (
-        "Should read: id between lower and upper bounds"
-    )
+    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id between lower and upper bounds"
 
-    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
-        "Should read: id equal to upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id equal to upper bound"
 
-    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(manifest), (
-        "Should read: id above upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id above upper bound"
 
-    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(manifest), (
-        "Should read: id above upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id above upper bound"
 
 
 def test_integer_not_eq_rewritten(schema: Schema, manifest: ManifestFile) -> None:
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(manifest), (
-        "Should read: id below lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id below lower bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 1)), case_sensitive=True).eval(manifest), (
-        "Should read: id below lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 1)), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id below lower bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE)), case_sensitive=True).eval(manifest), (
-        "Should read: id equal to lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE)), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id equal to lower bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE - 4)), case_sensitive=True).eval(manifest), (
-        "Should read: id between lower and upper bounds"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE - 4)), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id between lower and upper bounds"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE)), case_sensitive=True).eval(manifest), (
-        "Should read: id equal to upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE)), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id equal to upper bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 1)), case_sensitive=True).eval(manifest), (
-        "Should read: id above upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 1)), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id above upper bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 6)), case_sensitive=True).eval(manifest), (
-        "Should read: id above upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 6)), case_sensitive=True).eval(
+        manifest
+    ), "Should read: id above upper bound"
 
 
 def test_integer_not_eq_rewritten_case_insensitive(schema: Schema, manifest: ManifestFile) -> None:
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 25)), case_sensitive=False).eval(manifest), (
-        "Should read: id below lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 25)), case_sensitive=False).eval(
+        manifest
+    ), "Should read: id below lower bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 1)), case_sensitive=False).eval(manifest), (
-        "Should read: id below lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 1)), case_sensitive=False).eval(
+        manifest
+    ), "Should read: id below lower bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE)), case_sensitive=False).eval(manifest), (
-        "Should read: id equal to lower bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE)), case_sensitive=False).eval(
+        manifest
+    ), "Should read: id equal to lower bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE - 4)), case_sensitive=False).eval(manifest), (
-        "Should read: id between lower and upper bounds"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE - 4)), case_sensitive=False).eval(
+        manifest
+    ), "Should read: id between lower and upper bounds"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE)), case_sensitive=False).eval(manifest), (
-        "Should read: id equal to upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE)), case_sensitive=False).eval(
+        manifest
+    ), "Should read: id equal to upper bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 1)), case_sensitive=False).eval(manifest), (
-        "Should read: id above upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 1)), case_sensitive=False).eval(
+        manifest
+    ), "Should read: id above upper bound"
 
-    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 6)), case_sensitive=False).eval(manifest), (
-        "Should read: id above upper bound"
-    )
+    assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 6)), case_sensitive=False).eval(
+        manifest
+    ), "Should read: id above upper bound"
 
 
 def test_integer_in(schema: Schema, manifest: ManifestFile) -> None:
@@ -1342,13 +1342,13 @@ def test_integer_in(schema: Schema, manifest: ManifestFile) -> None:
         manifest
     ), "Should skip: in on all nulls column"
 
-    assert _ManifestEvalVisitor(schema, In(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), (
-        "Should read: in on some nulls column"
-    )
+    assert _ManifestEvalVisitor(schema, In(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: in on some nulls column"
 
-    assert _ManifestEvalVisitor(schema, In(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), (
-        "Should read: in on no nulls column"
-    )
+    assert _ManifestEvalVisitor(schema, In(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: in on no nulls column"
 
 
 def test_integer_not_in(schema: Schema, manifest: ManifestFile) -> None:
@@ -1384,73 +1384,73 @@ def test_integer_not_in(schema: Schema, manifest: ManifestFile) -> None:
         manifest
     ), "Should read: notIn on no nulls column"
 
-    assert _ManifestEvalVisitor(schema, NotIn(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), (
-        "Should read: in on some nulls column"
-    )
+    assert _ManifestEvalVisitor(schema, NotIn(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: in on some nulls column"
 
-    assert _ManifestEvalVisitor(schema, NotIn(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), (
-        "Should read: in on no nulls column"
-    )
+    assert _ManifestEvalVisitor(schema, NotIn(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(
+        manifest
+    ), "Should read: in on no nulls column"
 
 
 def test_string_starts_with(schema: Schema, manifest: ManifestFile) -> None:
-    assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, StartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, StartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(manifest), (
-        "Should skip: range doesn't match"
-    )
+    assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(
+        manifest
+    ), "Should skip: range doesn't match"
 
-    assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(manifest), (
-        "Should skip: range doesn't match"
-    )
+    assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(
+        manifest
+    ), "Should skip: range doesn't match"
 
 
 def test_string_not_starts_with(schema: Schema, manifest: ManifestFile) -> None:
-    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
-    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(manifest), (
-        "Should read: range matches"
-    )
+    assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(
+        manifest
+    ), "Should read: range matches"
 
     assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("all_same_value_or_null"), "a"), case_sensitive=False).eval(
         manifest
diff --git a/tests/integration/test_add_files.py b/tests/integration/test_add_files.py
index 85e626edf4..c1d916e0e0 100644
--- a/tests/integration/test_add_files.py
+++ b/tests/integration/test_add_files.py
@@ -52,12 +52,14 @@
     NestedField(field_id=10, name="qux", field_type=DateType(), required=False),
 )
 
-ARROW_SCHEMA = pa.schema([
-    ("foo", pa.bool_()),
-    ("bar", pa.string()),
-    ("baz", pa.int32()),
-    ("qux", pa.date32()),
-])
+ARROW_SCHEMA = pa.schema(
+    [
+        ("foo", pa.bool_()),
+        ("bar", pa.string()),
+        ("baz", pa.int32()),
+        ("qux", pa.date32()),
+    ]
+)
 
 ARROW_TABLE = pa.Table.from_pylist(
     [
@@ -71,12 +73,14 @@
     schema=ARROW_SCHEMA,
 )
 
-ARROW_SCHEMA_WITH_IDS = pa.schema([
-    pa.field("foo", pa.bool_(), nullable=False, metadata={"PARQUET:field_id": "1"}),
-    pa.field("bar", pa.string(), nullable=False, metadata={"PARQUET:field_id": "2"}),
-    pa.field("baz", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "3"}),
-    pa.field("qux", pa.date32(), nullable=False, metadata={"PARQUET:field_id": "4"}),
-])
+ARROW_SCHEMA_WITH_IDS = pa.schema(
+    [
+        pa.field("foo", pa.bool_(), nullable=False, metadata={"PARQUET:field_id": "1"}),
+        pa.field("bar", pa.string(), nullable=False, metadata={"PARQUET:field_id": "2"}),
+        pa.field("baz", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "3"}),
+        pa.field("qux", pa.date32(), nullable=False, metadata={"PARQUET:field_id": "4"}),
+    ]
+)
 
 
 ARROW_TABLE_WITH_IDS = pa.Table.from_pylist(
@@ -91,12 +95,14 @@
     schema=ARROW_SCHEMA_WITH_IDS,
 )
 
-ARROW_SCHEMA_UPDATED = pa.schema([
-    ("foo", pa.bool_()),
-    ("baz", pa.int32()),
-    ("qux", pa.date32()),
-    ("quux", pa.int32()),
-])
+ARROW_SCHEMA_UPDATED = pa.schema(
+    [
+        ("foo", pa.bool_()),
+        ("baz", pa.int32()),
+        ("qux", pa.date32()),
+        ("quux", pa.int32()),
+    ]
+)
 
 ARROW_TABLE_UPDATED = pa.Table.from_pylist(
     [
@@ -471,12 +477,14 @@ def test_add_files_fails_on_schema_mismatch(spark: SparkSession, session_catalog
     identifier = f"default.table_schema_mismatch_fails_v{format_version}"
 
     tbl = _create_table(session_catalog, identifier, format_version)
-    WRONG_SCHEMA = pa.schema([
-        ("foo", pa.bool_()),
-        ("bar", pa.string()),
-        ("baz", pa.string()),  # should be integer
-        ("qux", pa.date32()),
-    ])
+    WRONG_SCHEMA = pa.schema(
+        [
+            ("foo", pa.bool_()),
+            ("bar", pa.string()),
+            ("baz", pa.string()),  # should be integer
+            ("qux", pa.date32()),
+        ]
+    )
     file_path = f"s3://warehouse/default/table_schema_mismatch_fails/v{format_version}/test.parquet"
     # write parquet files
     fo = tbl.io.new_output(file_path)
@@ -522,12 +530,16 @@ def test_add_files_with_large_and_regular_schema(spark: SparkSession, session_ca
     identifier = f"default.unpartitioned_with_large_types{format_version}"
 
     iceberg_schema = Schema(NestedField(1, "foo", StringType(), required=True))
-    arrow_schema = pa.schema([
-        pa.field("foo", pa.string(), nullable=False),
-    ])
-    arrow_schema_large = pa.schema([
-        pa.field("foo", pa.large_string(), nullable=False),
-    ])
+    arrow_schema = pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=False),
+        ]
+    )
+    arrow_schema_large = pa.schema(
+        [
+            pa.field("foo", pa.large_string(), nullable=False),
+        ]
+    )
 
     tbl = _create_table(session_catalog, identifier, format_version, schema=iceberg_schema)
 
@@ -576,9 +588,11 @@ def test_add_files_with_large_and_regular_schema(spark: SparkSession, session_ca
 def test_add_files_with_timestamp_tz_ns_fails(session_catalog: Catalog, format_version: int, mocker: MockerFixture) -> None:
     nanoseconds_schema_iceberg = Schema(NestedField(1, "quux", TimestamptzType()))
 
-    nanoseconds_schema = pa.schema([
-        ("quux", pa.timestamp("ns", tz="UTC")),
-    ])
+    nanoseconds_schema = pa.schema(
+        [
+            ("quux", pa.timestamp("ns", tz="UTC")),
+        ]
+    )
 
     arrow_table = pa.Table.from_pylist(
         [
@@ -617,9 +631,11 @@ def test_add_file_with_valid_nullability_diff(spark: SparkSession, session_catal
     table_schema = Schema(
         NestedField(field_id=1, name="long", field_type=LongType(), required=False),
     )
-    other_schema = pa.schema((
-        pa.field("long", pa.int64(), nullable=False),  # can support writing required pyarrow field to optional Iceberg field
-    ))
+    other_schema = pa.schema(
+        (
+            pa.field("long", pa.int64(), nullable=False),  # can support writing required pyarrow field to optional Iceberg field
+        )
+    )
     arrow_table = pa.Table.from_pydict(
         {
             "long": [1, 9],
@@ -671,13 +687,15 @@ def test_add_files_with_valid_upcast(
     # table's long field should cast to long on read
     written_arrow_table = tbl.scan().to_arrow()
     assert written_arrow_table == pyarrow_table_with_promoted_types.cast(
-        pa.schema((
-            pa.field("long", pa.int64(), nullable=True),
-            pa.field("list", pa.large_list(pa.int64()), nullable=False),
-            pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False),
-            pa.field("double", pa.float64(), nullable=True),
-            pa.field("uuid", pa.binary(length=16), nullable=True),  # can UUID is read as fixed length binary of length 16
-        ))
+        pa.schema(
+            (
+                pa.field("long", pa.int64(), nullable=True),
+                pa.field("list", pa.large_list(pa.int64()), nullable=False),
+                pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False),
+                pa.field("double", pa.float64(), nullable=True),
+                pa.field("uuid", pa.binary(length=16), nullable=True),  # can UUID is read as fixed length binary of length 16
+            )
+        )
     )
     lhs = spark.table(f"{identifier}").toPandas()
     rhs = written_arrow_table.to_pandas()
diff --git a/tests/integration/test_deletes.py b/tests/integration/test_deletes.py
index f2417bde2d..ae03beea53 100644
--- a/tests/integration/test_deletes.py
+++ b/tests/integration/test_deletes.py
@@ -746,13 +746,15 @@ def test_delete_after_partition_evolution_from_partitioned(session_catalog: Rest
     arrow_table = pa.Table.from_arrays(
         [
             pa.array([2, 3, 4, 5, 6]),
-            pa.array([
-                datetime(2021, 5, 19),
-                datetime(2022, 7, 25),
-                datetime(2023, 3, 22),
-                datetime(2024, 7, 17),
-                datetime(2025, 2, 22),
-            ]),
+            pa.array(
+                [
+                    datetime(2021, 5, 19),
+                    datetime(2022, 7, 25),
+                    datetime(2023, 3, 22),
+                    datetime(2024, 7, 17),
+                    datetime(2025, 2, 22),
+                ]
+            ),
         ],
         names=["idx", "ts"],
     )
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
index 0279c2199a..8d13724087 100644
--- a/tests/integration/test_reads.py
+++ b/tests/integration/test_reads.py
@@ -833,12 +833,14 @@ def test_table_scan_default_to_large_types(catalog: Catalog) -> None:
 
     result_table = tbl.scan().to_arrow()
 
-    expected_schema = pa.schema([
-        pa.field("string", pa.large_string()),
-        pa.field("string-to-binary", pa.large_binary()),
-        pa.field("binary", pa.large_binary()),
-        pa.field("list", pa.large_list(pa.large_string())),
-    ])
+    expected_schema = pa.schema(
+        [
+            pa.field("string", pa.large_string()),
+            pa.field("string-to-binary", pa.large_binary()),
+            pa.field("binary", pa.large_binary()),
+            pa.field("list", pa.large_list(pa.large_string())),
+        ]
+    )
     assert result_table.schema.equals(expected_schema)
 
 
@@ -874,12 +876,14 @@ def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
     tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "False"
     result_table = tbl.scan().to_arrow()
 
-    expected_schema = pa.schema([
-        pa.field("string", pa.string()),
-        pa.field("string-to-binary", pa.binary()),
-        pa.field("binary", pa.binary()),
-        pa.field("list", pa.list_(pa.string())),
-    ])
+    expected_schema = pa.schema(
+        [
+            pa.field("string", pa.string()),
+            pa.field("string-to-binary", pa.binary()),
+            pa.field("binary", pa.binary()),
+            pa.field("list", pa.list_(pa.string())),
+        ]
+    )
     assert result_table.schema.equals(expected_schema)
 
 
diff --git a/tests/integration/test_rest_schema.py b/tests/integration/test_rest_schema.py
index 8e64142b3f..6a704839e2 100644
--- a/tests/integration/test_rest_schema.py
+++ b/tests/integration/test_rest_schema.py
@@ -685,11 +685,13 @@ def test_rename_simple(simple_table: Table) -> None:
     )
 
     # Check that the name mapping gets updated
-    assert simple_table.name_mapping() == NameMapping([
-        MappedField(field_id=1, names=["foo", "vo"]),
-        MappedField(field_id=2, names=["bar", "var"]),
-        MappedField(field_id=3, names=["baz"]),
-    ])
+    assert simple_table.name_mapping() == NameMapping(
+        [
+            MappedField(field_id=1, names=["foo", "vo"]),
+            MappedField(field_id=2, names=["bar", "var"]),
+            MappedField(field_id=3, names=["baz"]),
+        ]
+    )
 
 
 @pytest.mark.integration
@@ -719,9 +721,11 @@ def test_rename_simple_nested(catalog: Catalog) -> None:
     )
 
     # Check that the name mapping gets updated
-    assert tbl.name_mapping() == NameMapping([
-        MappedField(field_id=1, names=["foo"], fields=[MappedField(field_id=2, names=["bar", "vo"])]),
-    ])
+    assert tbl.name_mapping() == NameMapping(
+        [
+            MappedField(field_id=1, names=["foo"], fields=[MappedField(field_id=2, names=["bar", "vo"])]),
+        ]
+    )
 
 
 @pytest.mark.integration
diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py
index f9c0afd3bc..c23e836554 100644
--- a/tests/integration/test_writes/test_writes.py
+++ b/tests/integration/test_writes/test_writes.py
@@ -324,20 +324,24 @@ def test_python_writes_special_character_column_with_spark_reads(
             {"street": "789", "city": "Random", "zip": 10112, column_name_with_special_character: "c"},
         ],
     }
-    pa_schema = pa.schema([
-        pa.field(column_name_with_special_character, pa.string()),
-        pa.field("id", pa.int32()),
-        pa.field("name", pa.string()),
-        pa.field(
-            "address",
-            pa.struct([
-                pa.field("street", pa.string()),
-                pa.field("city", pa.string()),
-                pa.field("zip", pa.int32()),
-                pa.field(column_name_with_special_character, pa.string()),
-            ]),
-        ),
-    ])
+    pa_schema = pa.schema(
+        [
+            pa.field(column_name_with_special_character, pa.string()),
+            pa.field("id", pa.int32()),
+            pa.field("name", pa.string()),
+            pa.field(
+                "address",
+                pa.struct(
+                    [
+                        pa.field("street", pa.string()),
+                        pa.field("city", pa.string()),
+                        pa.field("zip", pa.int32()),
+                        pa.field(column_name_with_special_character, pa.string()),
+                    ]
+                ),
+            ),
+        ]
+    )
     arrow_table_with_special_character_column = pa.Table.from_pydict(TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN, schema=pa_schema)
     tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)
 
@@ -357,10 +361,12 @@ def test_python_writes_dictionary_encoded_column_with_spark_reads(
         "id": [1, 2, 3, 1, 1],
         "name": ["AB", "CD", "EF", "CD", "EF"],
     }
-    pa_schema = pa.schema([
-        pa.field("id", pa.dictionary(pa.int32(), pa.int32(), False)),
-        pa.field("name", pa.dictionary(pa.int32(), pa.string(), False)),
-    ])
+    pa_schema = pa.schema(
+        [
+            pa.field("id", pa.dictionary(pa.int32(), pa.int32(), False)),
+            pa.field("name", pa.dictionary(pa.int32(), pa.string(), False)),
+        ]
+    )
     arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema)
 
     tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)
@@ -387,20 +393,24 @@ def test_python_writes_with_small_and_large_types_spark_reads(
             {"street": "789", "city": "Random", "zip": 10112, "bar": "c"},
         ],
     }
-    pa_schema = pa.schema([
-        pa.field("foo", pa.large_string()),
-        pa.field("id", pa.int32()),
-        pa.field("name", pa.string()),
-        pa.field(
-            "address",
-            pa.struct([
-                pa.field("street", pa.string()),
-                pa.field("city", pa.string()),
-                pa.field("zip", pa.int32()),
-                pa.field("bar", pa.large_string()),
-            ]),
-        ),
-    ])
+    pa_schema = pa.schema(
+        [
+            pa.field("foo", pa.large_string()),
+            pa.field("id", pa.int32()),
+            pa.field("name", pa.string()),
+            pa.field(
+                "address",
+                pa.struct(
+                    [
+                        pa.field("street", pa.string()),
+                        pa.field("city", pa.string()),
+                        pa.field("zip", pa.int32()),
+                        pa.field("bar", pa.large_string()),
+                    ]
+                ),
+            ),
+        ]
+    )
     arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema)
     tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)
 
@@ -409,20 +419,24 @@ def test_python_writes_with_small_and_large_types_spark_reads(
     pyiceberg_df = tbl.scan().to_pandas()
     assert spark_df.equals(pyiceberg_df)
     arrow_table_on_read = tbl.scan().to_arrow()
-    assert arrow_table_on_read.schema == pa.schema([
-        pa.field("foo", pa.large_string()),
-        pa.field("id", pa.int32()),
-        pa.field("name", pa.large_string()),
-        pa.field(
-            "address",
-            pa.struct([
-                pa.field("street", pa.large_string()),
-                pa.field("city", pa.large_string()),
-                pa.field("zip", pa.int32()),
-                pa.field("bar", pa.large_string()),
-            ]),
-        ),
-    ])
+    assert arrow_table_on_read.schema == pa.schema(
+        [
+            pa.field("foo", pa.large_string()),
+            pa.field("id", pa.int32()),
+            pa.field("name", pa.large_string()),
+            pa.field(
+                "address",
+                pa.struct(
+                    [
+                        pa.field("street", pa.large_string()),
+                        pa.field("city", pa.large_string()),
+                        pa.field("zip", pa.int32()),
+                        pa.field("bar", pa.large_string()),
+                    ]
+                ),
+            ),
+        ]
+    )
 
 
 @pytest.mark.integration
@@ -718,10 +732,12 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None
             "foo": ["a", None, "z"],
             "bar": [19, None, 25],
         },
-        schema=pa.schema([
-            pa.field("foo", pa.string(), nullable=True),
-            pa.field("bar", pa.int32(), nullable=True),
-        ]),
+        schema=pa.schema(
+            [
+                pa.field("foo", pa.string(), nullable=True),
+                pa.field("bar", pa.int32(), nullable=True),
+            ]
+        ),
     )
 
     with tbl.transaction() as txn:
@@ -761,10 +777,12 @@ def test_create_table_transaction(catalog: Catalog, format_version: int) -> None
             "foo": ["a", None, "z"],
             "bar": [19, None, 25],
         },
-        schema=pa.schema([
-            pa.field("foo", pa.string(), nullable=True),
-            pa.field("bar", pa.int32(), nullable=True),
-        ]),
+        schema=pa.schema(
+            [
+                pa.field("foo", pa.string(), nullable=True),
+                pa.field("bar", pa.int32(), nullable=True),
+            ]
+        ),
     )
 
     with catalog.create_table_transaction(
@@ -810,9 +828,9 @@ def test_create_table_with_non_default_values(catalog: Catalog, table_schema_wit
     except NoSuchTableError:
         pass
 
-    iceberg_spec = PartitionSpec(*[
-        PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="integer_partition")
-    ])
+    iceberg_spec = PartitionSpec(
+        *[PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="integer_partition")]
+    )
 
     sort_order = SortOrder(*[SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.ASC)])
 
@@ -1071,9 +1089,11 @@ def test_table_write_schema_with_valid_nullability_diff(
     table_schema = Schema(
         NestedField(field_id=1, name="long", field_type=LongType(), required=False),
     )
-    other_schema = pa.schema((
-        pa.field("long", pa.int64(), nullable=False),  # can support writing required pyarrow field to optional Iceberg field
-    ))
+    other_schema = pa.schema(
+        (
+            pa.field("long", pa.int64(), nullable=False),  # can support writing required pyarrow field to optional Iceberg field
+        )
+    )
     arrow_table = pa.Table.from_pydict(
         {
             "long": [1, 9],
@@ -1114,13 +1134,15 @@ def test_table_write_schema_with_valid_upcast(
     # table's long field should cast to long on read
     written_arrow_table = tbl.scan().to_arrow()
     assert written_arrow_table == pyarrow_table_with_promoted_types.cast(
-        pa.schema((
-            pa.field("long", pa.int64(), nullable=True),
-            pa.field("list", pa.large_list(pa.int64()), nullable=False),
-            pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False),
-            pa.field("double", pa.float64(), nullable=True),  # can support upcasting float to double
-            pa.field("uuid", pa.binary(length=16), nullable=True),  # can UUID is read as fixed length binary of length 16
-        ))
+        pa.schema(
+            (
+                pa.field("long", pa.int64(), nullable=True),
+                pa.field("list", pa.large_list(pa.int64()), nullable=False),
+                pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False),
+                pa.field("double", pa.float64(), nullable=True),  # can support upcasting float to double
+                pa.field("uuid", pa.binary(length=16), nullable=True),  # can UUID is read as fixed length binary of length 16
+            )
+        )
     )
     lhs = spark.table(f"{identifier}").toPandas()
     rhs = written_arrow_table.to_pandas()
@@ -1510,16 +1532,20 @@ def test_rewrite_manifest_after_partition_evolution(session_catalog: Catalog) ->
 def test_writing_null_structs(session_catalog: Catalog) -> None:
     import pyarrow as pa
 
-    schema = pa.schema([
-        pa.field(
-            "struct_field_1",
-            pa.struct([
-                pa.field("string_nested_1", pa.string()),
-                pa.field("int_item_2", pa.int32()),
-                pa.field("float_item_2", pa.float32()),
-            ]),
-        ),
-    ])
+    schema = pa.schema(
+        [
+            pa.field(
+                "struct_field_1",
+                pa.struct(
+                    [
+                        pa.field("string_nested_1", pa.string()),
+                        pa.field("int_item_2", pa.int32()),
+                        pa.field("float_item_2", pa.float32()),
+                    ]
+                ),
+            ),
+        ]
+    )
 
     records = [
         {
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index e4017e1df5..8bb97e150a 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -547,11 +547,13 @@ def test_binary_type_to_pyarrow() -> None:
 
 
 def test_struct_type_to_pyarrow(table_schema_simple: Schema) -> None:
-    expected = pa.struct([
-        pa.field("foo", pa.large_string(), nullable=True, metadata={"field_id": "1"}),
-        pa.field("bar", pa.int32(), nullable=False, metadata={"field_id": "2"}),
-        pa.field("baz", pa.bool_(), nullable=True, metadata={"field_id": "3"}),
-    ])
+    expected = pa.struct(
+        [
+            pa.field("foo", pa.large_string(), nullable=True, metadata={"field_id": "1"}),
+            pa.field("bar", pa.int32(), nullable=False, metadata={"field_id": "2"}),
+            pa.field("baz", pa.bool_(), nullable=True, metadata={"field_id": "3"}),
+        ]
+    )
     assert visit(table_schema_simple.as_struct(), _ConvertToArrowSchema()) == expected
 
 
@@ -1771,11 +1773,13 @@ def test_bin_pack_arrow_table(arrow_table_with_null: pa.Table) -> None:
 
 
 def test_schema_mismatch_type(table_schema_simple: Schema) -> None:
-    other_schema = pa.schema((
-        pa.field("foo", pa.string(), nullable=True),
-        pa.field("bar", pa.decimal128(18, 6), nullable=False),
-        pa.field("baz", pa.bool_(), nullable=True),
-    ))
+    other_schema = pa.schema(
+        (
+            pa.field("foo", pa.string(), nullable=True),
+            pa.field("bar", pa.decimal128(18, 6), nullable=False),
+            pa.field("baz", pa.bool_(), nullable=True),
+        )
+    )
 
     expected = r"""Mismatch in fields:
 ┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
@@ -1792,11 +1796,13 @@ def test_schema_mismatch_type(table_schema_simple: Schema) -> None:
 
 
 def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None:
-    other_schema = pa.schema((
-        pa.field("foo", pa.string(), nullable=True),
-        pa.field("bar", pa.int32(), nullable=True),
-        pa.field("baz", pa.bool_(), nullable=True),
-    ))
+    other_schema = pa.schema(
+        (
+            pa.field("foo", pa.string(), nullable=True),
+            pa.field("bar", pa.int32(), nullable=True),
+            pa.field("baz", pa.bool_(), nullable=True),
+        )
+    )
 
     expected = """Mismatch in fields:
 ┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓
@@ -1813,11 +1819,13 @@ def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None:
 
 
 def test_schema_compatible_nullability_diff(table_schema_simple: Schema) -> None:
-    other_schema = pa.schema((
-        pa.field("foo", pa.string(), nullable=True),
-        pa.field("bar", pa.int32(), nullable=False),
-        pa.field("baz", pa.bool_(), nullable=False),
-    ))
+    other_schema = pa.schema(
+        (
+            pa.field("foo", pa.string(), nullable=True),
+            pa.field("bar", pa.int32(), nullable=False),
+            pa.field("baz", pa.bool_(), nullable=False),
+        )
+    )
 
     try:
         _check_pyarrow_schema_compatible(table_schema_simple, other_schema)
@@ -1826,10 +1834,12 @@ def test_schema_compatible_nullability_diff(table_schema_simple: Schema) -> None
 
 
 def test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None:
-    other_schema = pa.schema((
-        pa.field("foo", pa.string(), nullable=True),
-        pa.field("baz", pa.bool_(), nullable=True),
-    ))
+    other_schema = pa.schema(
+        (
+            pa.field("foo", pa.string(), nullable=True),
+            pa.field("baz", pa.bool_(), nullable=True),
+        )
+    )
 
     expected = """Mismatch in fields:
 ┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓
@@ -1851,9 +1861,11 @@ def test_schema_compatible_missing_nullable_field_nested(table_schema_nested: Sc
         6,
         pa.field(
             "person",
-            pa.struct([
-                pa.field("age", pa.int32(), nullable=False),
-            ]),
+            pa.struct(
+                [
+                    pa.field("age", pa.int32(), nullable=False),
+                ]
+            ),
             nullable=True,
         ),
     )
@@ -1869,9 +1881,11 @@ def test_schema_mismatch_missing_required_field_nested(table_schema_nested: Sche
         6,
         pa.field(
             "person",
-            pa.struct([
-                pa.field("name", pa.string(), nullable=True),
-            ]),
+            pa.struct(
+                [
+                    pa.field("name", pa.string(), nullable=True),
+                ]
+            ),
             nullable=True,
         ),
     )
@@ -1920,12 +1934,14 @@ def test_schema_compatible_nested(table_schema_nested: Schema) -> None:
 
 
 def test_schema_mismatch_additional_field(table_schema_simple: Schema) -> None:
-    other_schema = pa.schema((
-        pa.field("foo", pa.string(), nullable=True),
-        pa.field("bar", pa.int32(), nullable=False),
-        pa.field("baz", pa.bool_(), nullable=True),
-        pa.field("new_field", pa.date32(), nullable=True),
-    ))
+    other_schema = pa.schema(
+        (
+            pa.field("foo", pa.string(), nullable=True),
+            pa.field("bar", pa.int32(), nullable=False),
+            pa.field("baz", pa.bool_(), nullable=True),
+            pa.field("new_field", pa.date32(), nullable=True),
+        )
+    )
 
     with pytest.raises(
         ValueError, match=r"PyArrow table contains more columns: new_field. Update the schema first \(hint, use union_by_name\)."
@@ -1942,10 +1958,12 @@ def test_schema_compatible(table_schema_simple: Schema) -> None:
 
 def test_schema_projection(table_schema_simple: Schema) -> None:
     # remove optional `baz` field from `table_schema_simple`
-    other_schema = pa.schema((
-        pa.field("foo", pa.string(), nullable=True),
-        pa.field("bar", pa.int32(), nullable=False),
-    ))
+    other_schema = pa.schema(
+        (
+            pa.field("foo", pa.string(), nullable=True),
+            pa.field("bar", pa.int32(), nullable=False),
+        )
+    )
     try:
         _check_pyarrow_schema_compatible(table_schema_simple, other_schema)
     except Exception:
@@ -1954,11 +1972,13 @@ def test_schema_projection(table_schema_simple: Schema) -> None:
 
 def test_schema_downcast(table_schema_simple: Schema) -> None:
     # large_string type is compatible with string type
-    other_schema = pa.schema((
-        pa.field("foo", pa.large_string(), nullable=True),
-        pa.field("bar", pa.int32(), nullable=False),
-        pa.field("baz", pa.bool_(), nullable=True),
-    ))
+    other_schema = pa.schema(
+        (
+            pa.field("foo", pa.large_string(), nullable=True),
+            pa.field("bar", pa.int32(), nullable=False),
+            pa.field("baz", pa.bool_(), nullable=True),
+        )
+    )
 
     try:
         _check_pyarrow_schema_compatible(table_schema_simple, other_schema)
@@ -2037,11 +2057,13 @@ def test_identity_partition_on_multi_columns() -> None:
         assert {table_partition.partition_key.partition for table_partition in result} == expected
         concatenated_arrow_table = pa.concat_tables([table_partition.arrow_table_partition for table_partition in result])
         assert concatenated_arrow_table.num_rows == arrow_table.num_rows
-        assert concatenated_arrow_table.sort_by([
-            ("born_year", "ascending"),
-            ("n_legs", "ascending"),
-            ("animal", "ascending"),
-        ]) == arrow_table.sort_by([("born_year", "ascending"), ("n_legs", "ascending"), ("animal", "ascending")])
+        assert concatenated_arrow_table.sort_by(
+            [
+                ("born_year", "ascending"),
+                ("n_legs", "ascending"),
+                ("animal", "ascending"),
+            ]
+        ) == arrow_table.sort_by([("born_year", "ascending"), ("n_legs", "ascending"), ("animal", "ascending")])
 
 
 def test__to_requested_schema_timestamps(
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index 9e6df720c6..027fccae7c 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -239,11 +239,13 @@ def test_pyarrow_variable_binary_to_iceberg() -> None:
 
 
 def test_pyarrow_struct_to_iceberg() -> None:
-    pyarrow_struct = pa.struct([
-        pa.field("foo", pa.string(), nullable=True, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
-        pa.field("bar", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "2"}),
-        pa.field("baz", pa.bool_(), nullable=True, metadata={"PARQUET:field_id": "3"}),
-    ])
+    pyarrow_struct = pa.struct(
+        [
+            pa.field("foo", pa.string(), nullable=True, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
+            pa.field("bar", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "2"}),
+            pa.field("baz", pa.bool_(), nullable=True, metadata={"PARQUET:field_id": "3"}),
+        ]
+    )
     expected = StructType(
         NestedField(field_id=1, name="foo", field_type=StringType(), required=False, doc="foo doc"),
         NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True),
@@ -344,84 +346,94 @@ def test_round_schema_large_string() -> None:
 
 
 def test_simple_schema_has_missing_ids() -> None:
-    schema = pa.schema([
-        pa.field("foo", pa.string(), nullable=False),
-    ])
+    schema = pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=False),
+        ]
+    )
     visitor = _HasIds()
     has_ids = visit_pyarrow(schema, visitor)
     assert not has_ids
 
 
 def test_simple_schema_has_missing_ids_partial() -> None:
-    schema = pa.schema([
-        pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
-        pa.field("bar", pa.int32(), nullable=False),
-    ])
+    schema = pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
+            pa.field("bar", pa.int32(), nullable=False),
+        ]
+    )
     visitor = _HasIds()
     has_ids = visit_pyarrow(schema, visitor)
     assert not has_ids
 
 
 def test_nested_schema_has_missing_ids() -> None:
-    schema = pa.schema([
-        pa.field("foo", pa.string(), nullable=False),
-        pa.field(
-            "quux",
-            pa.map_(
-                pa.string(),
-                pa.map_(pa.string(), pa.int32()),
+    schema = pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=False),
+            pa.field(
+                "quux",
+                pa.map_(
+                    pa.string(),
+                    pa.map_(pa.string(), pa.int32()),
+                ),
+                nullable=False,
             ),
-            nullable=False,
-        ),
-    ])
+        ]
+    )
     visitor = _HasIds()
     has_ids = visit_pyarrow(schema, visitor)
     assert not has_ids
 
 
 def test_nested_schema_has_ids() -> None:
-    schema = pa.schema([
-        pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
-        pa.field(
-            "quux",
-            pa.map_(
-                pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}),
-                pa.field(
-                    "value",
-                    pa.map_(
-                        pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "9"}),
-                        pa.field("value", pa.int32(), metadata={"PARQUET:field_id": "10"}),
+    schema = pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
+            pa.field(
+                "quux",
+                pa.map_(
+                    pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}),
+                    pa.field(
+                        "value",
+                        pa.map_(
+                            pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "9"}),
+                            pa.field("value", pa.int32(), metadata={"PARQUET:field_id": "10"}),
+                        ),
+                        nullable=False,
+                        metadata={"PARQUET:field_id": "8"},
                     ),
-                    nullable=False,
-                    metadata={"PARQUET:field_id": "8"},
                 ),
+                nullable=False,
+                metadata={"PARQUET:field_id": "6", "doc": "quux doc"},
             ),
-            nullable=False,
-            metadata={"PARQUET:field_id": "6", "doc": "quux doc"},
-        ),
-    ])
+        ]
+    )
     visitor = _HasIds()
     has_ids = visit_pyarrow(schema, visitor)
     assert has_ids
 
 
 def test_nested_schema_has_partial_missing_ids() -> None:
-    schema = pa.schema([
-        pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
-        pa.field(
-            "quux",
-            pa.map_(
-                pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}),
-                pa.field(
-                    "value",
-                    pa.map_(pa.field("key", pa.string(), nullable=False), pa.field("value", pa.int32())),
-                    nullable=False,
+    schema = pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
+            pa.field(
+                "quux",
+                pa.map_(
+                    pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}),
+                    pa.field(
+                        "value",
+                        pa.map_(pa.field("key", pa.string(), nullable=False), pa.field("value", pa.int32())),
+                        nullable=False,
+                    ),
                 ),
+                nullable=False,
+                metadata={"PARQUET:field_id": "6", "doc": "quux doc"},
             ),
-            nullable=False,
-            metadata={"PARQUET:field_id": "6", "doc": "quux doc"},
-        ),
-    ])
+        ]
+    )
     visitor = _HasIds()
     has_ids = visit_pyarrow(schema, visitor)
     assert not has_ids
@@ -441,11 +453,13 @@ def test_simple_pyarrow_schema_to_schema_missing_ids_using_name_mapping(
     pyarrow_schema_simple_without_ids: pa.Schema, iceberg_schema_simple: Schema
 ) -> None:
     schema = pyarrow_schema_simple_without_ids
-    name_mapping = NameMapping([
-        MappedField(field_id=1, names=["foo"]),
-        MappedField(field_id=2, names=["bar"]),
-        MappedField(field_id=3, names=["baz"]),
-    ])
+    name_mapping = NameMapping(
+        [
+            MappedField(field_id=1, names=["foo"]),
+            MappedField(field_id=2, names=["bar"]),
+            MappedField(field_id=3, names=["baz"]),
+        ]
+    )
 
     assert pyarrow_to_schema(schema, name_mapping) == iceberg_schema_simple
 
@@ -454,9 +468,11 @@ def test_simple_pyarrow_schema_to_schema_missing_ids_using_name_mapping_partial_
     pyarrow_schema_simple_without_ids: pa.Schema,
 ) -> None:
     schema = pyarrow_schema_simple_without_ids
-    name_mapping = NameMapping([
-        MappedField(field_id=1, names=["foo"]),
-    ])
+    name_mapping = NameMapping(
+        [
+            MappedField(field_id=1, names=["foo"]),
+        ]
+    )
     with pytest.raises(ValueError) as exc_info:
         _ = pyarrow_to_schema(schema, name_mapping)
     assert "Could not find field with name: bar" in str(exc_info.value)
@@ -467,83 +483,89 @@ def test_nested_pyarrow_schema_to_schema_missing_ids_using_name_mapping(
 ) -> None:
     schema = pyarrow_schema_nested_without_ids
 
-    name_mapping = NameMapping([
-        MappedField(field_id=1, names=["foo"]),
-        MappedField(field_id=2, names=["bar"]),
-        MappedField(field_id=3, names=["baz"]),
-        MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
-        MappedField(
-            field_id=6,
-            names=["quux"],
-            fields=[
-                MappedField(field_id=7, names=["key"]),
-                MappedField(
-                    field_id=8,
-                    names=["value"],
-                    fields=[
-                        MappedField(field_id=9, names=["key"]),
-                        MappedField(field_id=10, names=["value"]),
-                    ],
-                ),
-            ],
-        ),
-        MappedField(
-            field_id=11,
-            names=["location"],
-            fields=[
-                MappedField(
-                    field_id=12,
-                    names=["element"],
-                    fields=[
-                        MappedField(field_id=13, names=["latitude"]),
-                        MappedField(field_id=14, names=["longitude"]),
-                    ],
-                )
-            ],
-        ),
-        MappedField(
-            field_id=15,
-            names=["person"],
-            fields=[
-                MappedField(field_id=16, names=["name"]),
-                MappedField(field_id=17, names=["age"]),
-            ],
-        ),
-    ])
+    name_mapping = NameMapping(
+        [
+            MappedField(field_id=1, names=["foo"]),
+            MappedField(field_id=2, names=["bar"]),
+            MappedField(field_id=3, names=["baz"]),
+            MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
+            MappedField(
+                field_id=6,
+                names=["quux"],
+                fields=[
+                    MappedField(field_id=7, names=["key"]),
+                    MappedField(
+                        field_id=8,
+                        names=["value"],
+                        fields=[
+                            MappedField(field_id=9, names=["key"]),
+                            MappedField(field_id=10, names=["value"]),
+                        ],
+                    ),
+                ],
+            ),
+            MappedField(
+                field_id=11,
+                names=["location"],
+                fields=[
+                    MappedField(
+                        field_id=12,
+                        names=["element"],
+                        fields=[
+                            MappedField(field_id=13, names=["latitude"]),
+                            MappedField(field_id=14, names=["longitude"]),
+                        ],
+                    )
+                ],
+            ),
+            MappedField(
+                field_id=15,
+                names=["person"],
+                fields=[
+                    MappedField(field_id=16, names=["name"]),
+                    MappedField(field_id=17, names=["age"]),
+                ],
+            ),
+        ]
+    )
 
     assert pyarrow_to_schema(schema, name_mapping) == iceberg_schema_nested
 
 
 def test_pyarrow_schema_to_schema_missing_ids_using_name_mapping_nested_missing_id() -> None:
-    schema = pa.schema([
-        pa.field("foo", pa.string(), nullable=False),
-        pa.field(
-            "quux",
-            pa.map_(
-                pa.string(),
-                pa.map_(pa.string(), pa.int32()),
-            ),
-            nullable=False,
-        ),
-    ])
-
-    name_mapping = NameMapping([
-        MappedField(field_id=1, names=["foo"]),
-        MappedField(
-            field_id=6,
-            names=["quux"],
-            fields=[
-                MappedField(field_id=7, names=["key"]),
-                MappedField(
-                    field_id=8,
-                    names=["value"],
-                    fields=[
-                        MappedField(field_id=10, names=["value"]),
-                    ],
+    schema = pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=False),
+            pa.field(
+                "quux",
+                pa.map_(
+                    pa.string(),
+                    pa.map_(pa.string(), pa.int32()),
                 ),
-            ],
-        ),
-    ])
+                nullable=False,
+            ),
+        ]
+    )
+
+    name_mapping = NameMapping(
+        [
+            MappedField(field_id=1, names=["foo"]),
+            MappedField(
+                field_id=6,
+                names=["quux"],
+                fields=[
+                    MappedField(field_id=7, names=["key"]),
+                    MappedField(
+                        field_id=8,
+                        names=["value"],
+                        fields=[
+                            MappedField(field_id=10, names=["value"]),
+                        ],
+                    ),
+                ],
+            ),
+        ]
+    )
     with pytest.raises(ValueError) as exc_info:
         _ = pyarrow_to_schema(schema, name_mapping)
     assert "Could not find field with name: quux.value.key" in str(exc_info.value)
@@ -562,38 +584,44 @@ def test_pyarrow_schema_to_schema_fresh_ids_nested_schema(
 
 
 def test_pyarrow_schema_ensure_large_types(pyarrow_schema_nested_without_ids: pa.Schema) -> None:
-    expected_schema = pa.schema([
-        pa.field("foo", pa.large_string(), nullable=False),
-        pa.field("bar", pa.int32(), nullable=False),
-        pa.field("baz", pa.bool_(), nullable=True),
-        pa.field("qux", pa.large_list(pa.large_string()), nullable=False),
-        pa.field(
-            "quux",
-            pa.map_(
-                pa.large_string(),
-                pa.map_(pa.large_string(), pa.int32()),
+    expected_schema = pa.schema(
+        [
+            pa.field("foo", pa.large_string(), nullable=False),
+            pa.field("bar", pa.int32(), nullable=False),
+            pa.field("baz", pa.bool_(), nullable=True),
+            pa.field("qux", pa.large_list(pa.large_string()), nullable=False),
+            pa.field(
+                "quux",
+                pa.map_(
+                    pa.large_string(),
+                    pa.map_(pa.large_string(), pa.int32()),
+                ),
+                nullable=False,
             ),
-            nullable=False,
-        ),
-        pa.field(
-            "location",
-            pa.large_list(
-                pa.struct([
-                    pa.field("latitude", pa.float32(), nullable=False),
-                    pa.field("longitude", pa.float32(), nullable=False),
-                ]),
+            pa.field(
+                "location",
+                pa.large_list(
+                    pa.struct(
+                        [
+                            pa.field("latitude", pa.float32(), nullable=False),
+                            pa.field("longitude", pa.float32(), nullable=False),
+                        ]
+                    ),
+                ),
+                nullable=False,
             ),
-            nullable=False,
-        ),
-        pa.field(
-            "person",
-            pa.struct([
-                pa.field("name", pa.large_string(), nullable=True),
-                pa.field("age", pa.int32(), nullable=False),
-            ]),
-            nullable=True,
-        ),
-    ])
+            pa.field(
+                "person",
+                pa.struct(
+                    [
+                        pa.field("name", pa.large_string(), nullable=True),
+                        pa.field("age", pa.int32(), nullable=False),
+                    ]
+                ),
+                nullable=True,
+            ),
+        ]
+    )
     assert _pyarrow_schema_ensure_large_types(pyarrow_schema_nested_without_ids) == expected_schema
 
 
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index 397fa9f537..bcb2d643dc 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -538,15 +538,15 @@ def test_update_column(table_v1: Table, table_v2: Table) -> None:
         assert new_schema3.find_field("z").required is False, "failed to update existing field required"
 
         # assert the above two updates also works with union_by_name
-        assert table.update_schema().union_by_name(new_schema)._apply() == new_schema, (
-            "failed to update existing field doc with union_by_name"
-        )
-        assert table.update_schema().union_by_name(new_schema2)._apply() == new_schema2, (
-            "failed to remove existing field doc with union_by_name"
-        )
-        assert table.update_schema().union_by_name(new_schema3)._apply() == new_schema3, (
-            "failed to update existing field required with union_by_name"
-        )
+        assert (
+            table.update_schema().union_by_name(new_schema)._apply() == new_schema
+        ), "failed to update existing field doc with union_by_name"
+        assert (
+            table.update_schema().union_by_name(new_schema2)._apply() == new_schema2
+        ), "failed to remove existing field doc with union_by_name"
+        assert (
+            table.update_schema().union_by_name(new_schema3)._apply() == new_schema3
+        ), "failed to update existing field required with union_by_name"
 
 
 def test_add_primitive_type_column(table_v2: Table) -> None:
@@ -1077,52 +1077,56 @@ def test_assert_default_sort_order_id(table_v2: Table) -> None:
 
 
 def test_correct_schema() -> None:
-    table_metadata = TableMetadataV2(**{
-        "format-version": 2,
-        "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
-        "location": "s3://bucket/test/location",
-        "last-sequence-number": 34,
-        "last-updated-ms": 1602638573590,
-        "last-column-id": 3,
-        "current-schema-id": 1,
-        "schemas": [
-            {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
-            {
-                "type": "struct",
-                "schema-id": 1,
-                "identifier-field-ids": [1, 2],
-                "fields": [
-                    {"id": 1, "name": "x", "required": True, "type": "long"},
-                    {"id": 2, "name": "y", "required": True, "type": "long"},
-                    {"id": 3, "name": "z", "required": True, "type": "long"},
-                ],
-            },
-        ],
-        "default-spec-id": 0,
-        "partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
-        "last-partition-id": 1000,
-        "default-sort-order-id": 0,
-        "sort-orders": [],
-        "current-snapshot-id": 123,
-        "snapshots": [
-            {
-                "snapshot-id": 234,
-                "timestamp-ms": 1515100955770,
-                "sequence-number": 0,
-                "summary": {"operation": "append"},
-                "manifest-list": "s3://a/b/1.avro",
-                "schema-id": 10,
-            },
-            {
-                "snapshot-id": 123,
-                "timestamp-ms": 1515100955770,
-                "sequence-number": 0,
-                "summary": {"operation": "append"},
-                "manifest-list": "s3://a/b/1.avro",
-                "schema-id": 0,
-            },
-        ],
-    })
+    table_metadata = TableMetadataV2(
+        **{
+            "format-version": 2,
+            "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+            "location": "s3://bucket/test/location",
+            "last-sequence-number": 34,
+            "last-updated-ms": 1602638573590,
+            "last-column-id": 3,
+            "current-schema-id": 1,
+            "schemas": [
+                {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
+                {
+                    "type": "struct",
+                    "schema-id": 1,
+                    "identifier-field-ids": [1, 2],
+                    "fields": [
+                        {"id": 1, "name": "x", "required": True, "type": "long"},
+                        {"id": 2, "name": "y", "required": True, "type": "long"},
+                        {"id": 3, "name": "z", "required": True, "type": "long"},
+                    ],
+                },
+            ],
+            "default-spec-id": 0,
+            "partition-specs": [
+                {"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}
+            ],
+            "last-partition-id": 1000,
+            "default-sort-order-id": 0,
+            "sort-orders": [],
+            "current-snapshot-id": 123,
+            "snapshots": [
+                {
+                    "snapshot-id": 234,
+                    "timestamp-ms": 1515100955770,
+                    "sequence-number": 0,
+                    "summary": {"operation": "append"},
+                    "manifest-list": "s3://a/b/1.avro",
+                    "schema-id": 10,
+                },
+                {
+                    "snapshot-id": 123,
+                    "timestamp-ms": 1515100955770,
+                    "sequence-number": 0,
+                    "summary": {"operation": "append"},
+                    "manifest-list": "s3://a/b/1.avro",
+                    "schema-id": 0,
+                },
+            ],
+        }
+    )
 
     t = Table(
         identifier=("default", "t1"),
diff --git a/tests/table/test_name_mapping.py b/tests/table/test_name_mapping.py
index bd271f59f8..c567f3ffb4 100644
--- a/tests/table/test_name_mapping.py
+++ b/tests/table/test_name_mapping.py
@@ -30,49 +30,51 @@
 
 @pytest.fixture(scope="session")
 def table_name_mapping_nested() -> NameMapping:
-    return NameMapping([
-        MappedField(field_id=1, names=["foo"]),
-        MappedField(field_id=2, names=["bar"]),
-        MappedField(field_id=3, names=["baz"]),
-        MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
-        MappedField(
-            field_id=6,
-            names=["quux"],
-            fields=[
-                MappedField(field_id=7, names=["key"]),
-                MappedField(
-                    field_id=8,
-                    names=["value"],
-                    fields=[
-                        MappedField(field_id=9, names=["key"]),
-                        MappedField(field_id=10, names=["value"]),
-                    ],
-                ),
-            ],
-        ),
-        MappedField(
-            field_id=11,
-            names=["location"],
-            fields=[
-                MappedField(
-                    field_id=12,
-                    names=["element"],
-                    fields=[
-                        MappedField(field_id=13, names=["latitude"]),
-                        MappedField(field_id=14, names=["longitude"]),
-                    ],
-                )
-            ],
-        ),
-        MappedField(
-            field_id=15,
-            names=["person"],
-            fields=[
-                MappedField(field_id=16, names=["name"]),
-                MappedField(field_id=17, names=["age"]),
-            ],
-        ),
-    ])
+    return NameMapping(
+        [
+            MappedField(field_id=1, names=["foo"]),
+            MappedField(field_id=2, names=["bar"]),
+            MappedField(field_id=3, names=["baz"]),
+            MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
+            MappedField(
+                field_id=6,
+                names=["quux"],
+                fields=[
+                    MappedField(field_id=7, names=["key"]),
+                    MappedField(
+                        field_id=8,
+                        names=["value"],
+                        fields=[
+                            MappedField(field_id=9, names=["key"]),
+                            MappedField(field_id=10, names=["value"]),
+                        ],
+                    ),
+                ],
+            ),
+            MappedField(
+                field_id=11,
+                names=["location"],
+                fields=[
+                    MappedField(
+                        field_id=12,
+                        names=["element"],
+                        fields=[
+                            MappedField(field_id=13, names=["latitude"]),
+                            MappedField(field_id=14, names=["longitude"]),
+                        ],
+                    )
+                ],
+            ),
+            MappedField(
+                field_id=15,
+                names=["person"],
+                fields=[
+                    MappedField(field_id=16, names=["name"]),
+                    MappedField(field_id=17, names=["age"]),
+                ],
+            ),
+        ]
+    )
 
 
 def test_json_mapped_field_deserialization() -> None:
@@ -165,26 +167,30 @@ def test_json_name_mapping_deserialization() -> None:
 ]
     """
 
-    assert parse_mapping_from_json(name_mapping) == NameMapping([
-        MappedField(field_id=1, names=["id", "record_id"]),
-        MappedField(field_id=2, names=["data"]),
-        MappedField(
-            names=["location"],
-            field_id=3,
-            fields=[
-                MappedField(field_id=4, names=["latitude", "lat"]),
-                MappedField(field_id=5, names=["longitude", "long"]),
-            ],
-        ),
-    ])
+    assert parse_mapping_from_json(name_mapping) == NameMapping(
+        [
+            MappedField(field_id=1, names=["id", "record_id"]),
+            MappedField(field_id=2, names=["data"]),
+            MappedField(
+                names=["location"],
+                field_id=3,
+                fields=[
+                    MappedField(field_id=4, names=["latitude", "lat"]),
+                    MappedField(field_id=5, names=["longitude", "long"]),
+                ],
+            ),
+        ]
+    )
 
 
 def test_json_mapped_field_no_field_id_serialization() -> None:
-    table_name_mapping_nested_no_field_id = NameMapping([
-        MappedField(field_id=1, names=["foo"]),
-        MappedField(field_id=None, names=["bar"]),
-        MappedField(field_id=2, names=["qux"], fields=[MappedField(field_id=None, names=["element"])]),
-    ])
+    table_name_mapping_nested_no_field_id = NameMapping(
+        [
+            MappedField(field_id=1, names=["foo"]),
+            MappedField(field_id=None, names=["bar"]),
+            MappedField(field_id=2, names=["qux"], fields=[MappedField(field_id=None, names=["element"])]),
+        ]
+    )
 
     assert (
         table_name_mapping_nested_no_field_id.model_dump_json()
@@ -200,18 +206,20 @@ def test_json_serialization(table_name_mapping_nested: NameMapping) -> None:
 
 
 def test_name_mapping_to_string() -> None:
-    nm = NameMapping([
-        MappedField(field_id=1, names=["id", "record_id"]),
-        MappedField(field_id=2, names=["data"]),
-        MappedField(
-            names=["location"],
-            field_id=3,
-            fields=[
-                MappedField(field_id=4, names=["lat", "latitude"]),
-                MappedField(field_id=5, names=["long", "longitude"]),
-            ],
-        ),
-    ])
+    nm = NameMapping(
+        [
+            MappedField(field_id=1, names=["id", "record_id"]),
+            MappedField(field_id=2, names=["data"]),
+            MappedField(
+                names=["location"],
+                field_id=3,
+                fields=[
+                    MappedField(field_id=4, names=["lat", "latitude"]),
+                    MappedField(field_id=5, names=["long", "longitude"]),
+                ],
+            ),
+        ]
+    )
 
     assert (
         str(nm)
@@ -294,51 +302,53 @@ def test_update_mapping(table_name_mapping_nested: NameMapping) -> None:
         15: [NestedField(19, "name", StringType(), True), NestedField(20, "add_20", StringType(), True)],
     }
 
-    expected = NameMapping([
-        MappedField(field_id=1, names=["foo", "foo_update"]),
-        MappedField(field_id=2, names=["bar"]),
-        MappedField(field_id=3, names=["baz"]),
-        MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
-        MappedField(
-            field_id=6,
-            names=["quux"],
-            fields=[
-                MappedField(field_id=7, names=["key"]),
-                MappedField(
-                    field_id=8,
-                    names=["value"],
-                    fields=[
-                        MappedField(field_id=9, names=["key"]),
-                        MappedField(field_id=10, names=["value"]),
-                    ],
-                ),
-            ],
-        ),
-        MappedField(
-            field_id=11,
-            names=["location"],
-            fields=[
-                MappedField(
-                    field_id=12,
-                    names=["element"],
-                    fields=[
-                        MappedField(field_id=13, names=["latitude"]),
-                        MappedField(field_id=14, names=["longitude"]),
-                    ],
-                )
-            ],
-        ),
-        MappedField(
-            field_id=15,
-            names=["person"],
-            fields=[
-                MappedField(field_id=17, names=["age"]),
-                MappedField(field_id=19, names=["name"]),
-                MappedField(field_id=20, names=["add_20"]),
-            ],
-        ),
-        MappedField(field_id=18, names=["add_18"]),
-    ])
+    expected = NameMapping(
+        [
+            MappedField(field_id=1, names=["foo", "foo_update"]),
+            MappedField(field_id=2, names=["bar"]),
+            MappedField(field_id=3, names=["baz"]),
+            MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
+            MappedField(
+                field_id=6,
+                names=["quux"],
+                fields=[
+                    MappedField(field_id=7, names=["key"]),
+                    MappedField(
+                        field_id=8,
+                        names=["value"],
+                        fields=[
+                            MappedField(field_id=9, names=["key"]),
+                            MappedField(field_id=10, names=["value"]),
+                        ],
+                    ),
+                ],
+            ),
+            MappedField(
+                field_id=11,
+                names=["location"],
+                fields=[
+                    MappedField(
+                        field_id=12,
+                        names=["element"],
+                        fields=[
+                            MappedField(field_id=13, names=["latitude"]),
+                            MappedField(field_id=14, names=["longitude"]),
+                        ],
+                    )
+                ],
+            ),
+            MappedField(
+                field_id=15,
+                names=["person"],
+                fields=[
+                    MappedField(field_id=17, names=["age"]),
+                    MappedField(field_id=19, names=["name"]),
+                    MappedField(field_id=20, names=["add_20"]),
+                ],
+            ),
+            MappedField(field_id=18, names=["add_18"]),
+        ]
+    )
     assert update_mapping(table_name_mapping_nested, updates, adds) == expected
 
 
diff --git a/tests/test_schema.py b/tests/test_schema.py
index d1fc19df77..daa46dee1f 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -1618,11 +1618,13 @@ def test_append_nested_lists() -> None:
 def test_union_with_pa_schema(primitive_fields: NestedField) -> None:
     base_schema = Schema(NestedField(field_id=1, name="foo", field_type=StringType(), required=True))
 
-    pa_schema = pa.schema([
-        pa.field("foo", pa.string(), nullable=False),
-        pa.field("bar", pa.int32(), nullable=True),
-        pa.field("baz", pa.bool_(), nullable=True),
-    ])
+    pa_schema = pa.schema(
+        [
+            pa.field("foo", pa.string(), nullable=False),
+            pa.field("bar", pa.int32(), nullable=True),
+            pa.field("baz", pa.bool_(), nullable=True),
+        ]
+    )
 
     new_schema = UpdateSchema(transaction=None, schema=base_schema).union_by_name(pa_schema)._apply()  # type: ignore
 
@@ -1642,10 +1644,12 @@ def test_arrow_schema() -> None:
         NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False),
     )
 
-    expected_schema = pa.schema([
-        pa.field("foo", pa.large_string(), nullable=False),
-        pa.field("bar", pa.int32(), nullable=True),
-        pa.field("baz", pa.bool_(), nullable=True),
-    ])
+    expected_schema = pa.schema(
+        [
+            pa.field("foo", pa.large_string(), nullable=False),
+            pa.field("bar", pa.int32(), nullable=True),
+            pa.field("baz", pa.bool_(), nullable=True),
+        ]
+    )
 
     assert base_schema.as_arrow() == expected_schema
diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py
index 154671c92e..3b1fc6f013 100644
--- a/tests/utils/test_manifest.py
+++ b/tests/utils/test_manifest.py
@@ -621,9 +621,9 @@ def test_write_manifest_list(
 def test_file_format_case_insensitive(raw_file_format: str, expected_file_format: FileFormat) -> None:
     if expected_file_format:
         parsed_file_format = FileFormat(raw_file_format)
-        assert parsed_file_format == expected_file_format, (
-            f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}"
-        )
+        assert (
+            parsed_file_format == expected_file_format
+        ), f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}"
     else:
         with pytest.raises(ValueError):
             _ = FileFormat(raw_file_format)

From e5bfa1e49eda103c0808cff1e7c6a489f84982ea Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevinjqliu@users.noreply.github.com>
Date: Mon, 6 Jan 2025 01:53:12 -0500
Subject: [PATCH 12/32] Move `mkdocs` to use poetry as `docs` (#1486)

* poetry add $(cat mkdocs/requirements.txt | grep -v #) --group dev

* add `make docs`

* update instructions

* strict mode

* make docs-build

* docs-serve

* add comment

* add docs as dep group

* add make install-poetry
---
 .github/workflows/python-ci-docs.yml      |  10 +-
 .github/workflows/python-release-docs.yml |  12 +-
 Makefile                                  |  11 +-
 mkdocs/README.md                          |   5 +-
 mkdocs/requirements.txt                   |  28 --
 poetry.lock                               | 368 +++++++++++++++++++++-
 pyproject.toml                            | 319 +++++++++++++++++++
 7 files changed, 707 insertions(+), 46 deletions(-)
 delete mode 100644 mkdocs/requirements.txt

diff --git a/.github/workflows/python-ci-docs.yml b/.github/workflows/python-ci-docs.yml
index 19c4bb6ac1..d6e14c8400 100644
--- a/.github/workflows/python-ci-docs.yml
+++ b/.github/workflows/python-ci-docs.yml
@@ -36,12 +36,12 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
+      - name: Install poetry
+        run: make install-poetry
       - uses: actions/setup-python@v5
         with:
           python-version: 3.12
       - name: Install
-        working-directory: ./mkdocs
-        run: pip install -r requirements.txt
-      - name: Build
-        working-directory: ./mkdocs
-        run: mkdocs build --strict
+        run: make docs-install
+      - name: Build docs
+        run: make docs-build
diff --git a/.github/workflows/python-release-docs.yml b/.github/workflows/python-release-docs.yml
index 2f1b1155e9..2823563fe5 100644
--- a/.github/workflows/python-release-docs.yml
+++ b/.github/workflows/python-release-docs.yml
@@ -31,15 +31,15 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
+      - name: Install poetry
+        run: make install-poetry
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
-      - name: Install
-        working-directory: ./mkdocs
-        run: pip install -r requirements.txt
-      - name: Build
-        working-directory: ./mkdocs
-        run: mkdocs build --strict
+      - name: Install docs
+        run: make docs-install
+      - name: Build docs
+        run: make docs-build
       - name: Copy
         working-directory: ./mkdocs
         run: mv ./site /tmp/site
diff --git a/Makefile b/Makefile
index f2bb6f6871..b53a98da61 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ install-poetry:  ## Install poetry if the user has not done that yet.
          echo "Poetry is already installed."; \
      fi
 
-install-dependencies: ## Install dependencies including dev and all extras
+install-dependencies: ## Install dependencies including dev, docs, and all extras
 	poetry install --all-extras
 
 install: | install-poetry install-dependencies
@@ -97,3 +97,12 @@ clean: ## Clean up the project Python working environment
 	@find . -name "*.pyd" -exec echo Deleting {} \; -delete
 	@find . -name "*.pyo" -exec echo Deleting {} \; -delete
 	@echo "Cleanup complete"
+
+docs-install:
+	poetry install --with docs
+
+docs-serve:
+	poetry run mkdocs serve -f mkdocs/mkdocs.yml
+
+docs-build:
+	poetry run mkdocs build -f mkdocs/mkdocs.yml --strict
diff --git a/mkdocs/README.md b/mkdocs/README.md
index e9e0462bee..271025a726 100644
--- a/mkdocs/README.md
+++ b/mkdocs/README.md
@@ -22,7 +22,6 @@ The pyiceberg docs are stored in `docs/`.
 ## Running docs locally
 
 ```sh
-pip3 install -r requirements.txt
-mkdocs serve
-open http://localhost:8000/
+make docs-install
+make docs-serve
 ```
diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt
deleted file mode 100644
index f374b85bea..0000000000
--- a/mkdocs/requirements.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-mkdocs==1.6.1
-griffe==1.5.4
-jinja2==3.1.5
-mkdocstrings==0.27.0
-mkdocstrings-python==1.13.0
-mkdocs-literate-nav==0.6.1
-mkdocs-autorefs==1.2.0
-mkdocs-gen-files==0.5.0
-mkdocs-material==9.5.49
-mkdocs-material-extensions==1.3.1
-mkdocs-section-index==0.3.9
diff --git a/poetry.lock b/poetry.lock
index 4fd524bb3f..b1b73746c1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -345,6 +345,20 @@ typing-extensions = ">=4.6.0"
 [package.extras]
 aio = ["azure-core[aio] (>=1.30.0)"]
 
+[[package]]
+name = "babel"
+version = "2.16.0"
+description = "Internationalization utilities"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"},
+    {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"},
+]
+
+[package.extras]
+dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
+
 [[package]]
 name = "blinker"
 version = "1.9.0"
@@ -1461,6 +1475,23 @@ ray = ["packaging", "ray[client,data] (>=2.0.0)", "ray[client,data] (>=2.10.0)"]
 sql = ["connectorx", "sqlalchemy", "sqlglot"]
 unity = ["unitycatalog"]
 
+[[package]]
+name = "ghp-import"
+version = "2.1.0"
+description = "Copy your docs directly to the gh-pages branch."
+optional = false
+python-versions = "*"
+files = [
+    {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"},
+    {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"},
+]
+
+[package.dependencies]
+python-dateutil = ">=2.8.1"
+
+[package.extras]
+dev = ["flake8", "markdown", "twine", "wheel"]
+
 [[package]]
 name = "google-api-core"
 version = "2.24.0"
@@ -1745,6 +1776,20 @@ files = [
 docs = ["Sphinx", "furo"]
 test = ["objgraph", "psutil"]
 
+[[package]]
+name = "griffe"
+version = "1.5.4"
+description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "griffe-1.5.4-py3-none-any.whl", hash = "sha256:ed33af890586a5bebc842fcb919fc694b3dc1bc55b7d9e0228de41ce566b4a1d"},
+    {file = "griffe-1.5.4.tar.gz", hash = "sha256:073e78ad3e10c8378c2f798bd4ef87b92d8411e9916e157fd366a17cc4fd4e52"},
+]
+
+[package.dependencies]
+colorama = ">=0.4"
+
 [[package]]
 name = "identify"
 version = "2.6.3"
@@ -1896,8 +1941,6 @@ optional = false
 python-versions = "*"
 files = [
     {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
-    {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
-    {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
 ]
 
 [package.dependencies]
@@ -2012,6 +2055,24 @@ files = [
     {file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"},
 ]
 
+[[package]]
+name = "markdown"
+version = "3.7"
+description = "Python implementation of John Gruber's Markdown."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"},
+    {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"},
+]
+
+[package.dependencies]
+importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""}
+
+[package.extras]
+docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"]
+testing = ["coverage", "pyyaml"]
+
 [[package]]
 name = "markdown-it-py"
 version = "3.0.0"
@@ -2117,6 +2178,207 @@ files = [
     {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
 ]
 
+[[package]]
+name = "mergedeep"
+version = "1.3.4"
+description = "A deep merge function for 🐍."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"},
+    {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"},
+]
+
+[[package]]
+name = "mkdocs"
+version = "1.6.1"
+description = "Project documentation with Markdown."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"},
+    {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"},
+]
+
+[package.dependencies]
+click = ">=7.0"
+colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""}
+ghp-import = ">=1.0"
+importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""}
+jinja2 = ">=2.11.1"
+markdown = ">=3.3.6"
+markupsafe = ">=2.0.1"
+mergedeep = ">=1.3.4"
+mkdocs-get-deps = ">=0.2.0"
+packaging = ">=20.5"
+pathspec = ">=0.11.1"
+pyyaml = ">=5.1"
+pyyaml-env-tag = ">=0.1"
+watchdog = ">=2.0"
+
+[package.extras]
+i18n = ["babel (>=2.9.0)"]
+min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"]
+
+[[package]]
+name = "mkdocs-autorefs"
+version = "1.2.0"
+description = "Automatically link across pages in MkDocs."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mkdocs_autorefs-1.2.0-py3-none-any.whl", hash = "sha256:d588754ae89bd0ced0c70c06f58566a4ee43471eeeee5202427da7de9ef85a2f"},
+    {file = "mkdocs_autorefs-1.2.0.tar.gz", hash = "sha256:a86b93abff653521bda71cf3fc5596342b7a23982093915cb74273f67522190f"},
+]
+
+[package.dependencies]
+Markdown = ">=3.3"
+markupsafe = ">=2.0.1"
+mkdocs = ">=1.1"
+
+[[package]]
+name = "mkdocs-gen-files"
+version = "0.5.0"
+description = "MkDocs plugin to programmatically generate documentation pages during the build"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "mkdocs_gen_files-0.5.0-py3-none-any.whl", hash = "sha256:7ac060096f3f40bd19039e7277dd3050be9a453c8ac578645844d4d91d7978ea"},
+    {file = "mkdocs_gen_files-0.5.0.tar.gz", hash = "sha256:4c7cf256b5d67062a788f6b1d035e157fc1a9498c2399be9af5257d4ff4d19bc"},
+]
+
+[package.dependencies]
+mkdocs = ">=1.0.3"
+
+[[package]]
+name = "mkdocs-get-deps"
+version = "0.2.0"
+description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"},
+    {file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"},
+]
+
+[package.dependencies]
+importlib-metadata = {version = ">=4.3", markers = "python_version < \"3.10\""}
+mergedeep = ">=1.3.4"
+platformdirs = ">=2.2.0"
+pyyaml = ">=5.1"
+
+[[package]]
+name = "mkdocs-literate-nav"
+version = "0.6.1"
+description = "MkDocs plugin to specify the navigation in Markdown instead of YAML"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "mkdocs_literate_nav-0.6.1-py3-none-any.whl", hash = "sha256:e70bdc4a07050d32da79c0b697bd88e9a104cf3294282e9cb20eec94c6b0f401"},
+    {file = "mkdocs_literate_nav-0.6.1.tar.gz", hash = "sha256:78a7ab6d878371728acb0cdc6235c9b0ffc6e83c997b037f4a5c6ff7cef7d759"},
+]
+
+[package.dependencies]
+mkdocs = ">=1.0.3"
+
+[[package]]
+name = "mkdocs-material"
+version = "9.5.49"
+description = "Documentation that simply works"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mkdocs_material-9.5.49-py3-none-any.whl", hash = "sha256:c3c2d8176b18198435d3a3e119011922f3e11424074645c24019c2dcf08a360e"},
+    {file = "mkdocs_material-9.5.49.tar.gz", hash = "sha256:3671bb282b4f53a1c72e08adbe04d2481a98f85fed392530051f80ff94a9621d"},
+]
+
+[package.dependencies]
+babel = ">=2.10,<3.0"
+colorama = ">=0.4,<1.0"
+jinja2 = ">=3.0,<4.0"
+markdown = ">=3.2,<4.0"
+mkdocs = ">=1.6,<2.0"
+mkdocs-material-extensions = ">=1.3,<2.0"
+paginate = ">=0.5,<1.0"
+pygments = ">=2.16,<3.0"
+pymdown-extensions = ">=10.2,<11.0"
+regex = ">=2022.4"
+requests = ">=2.26,<3.0"
+
+[package.extras]
+git = ["mkdocs-git-committers-plugin-2 (>=1.1,<2.0)", "mkdocs-git-revision-date-localized-plugin (>=1.2.4,<2.0)"]
+imaging = ["cairosvg (>=2.6,<3.0)", "pillow (>=10.2,<11.0)"]
+recommended = ["mkdocs-minify-plugin (>=0.7,<1.0)", "mkdocs-redirects (>=1.2,<2.0)", "mkdocs-rss-plugin (>=1.6,<2.0)"]
+
+[[package]]
+name = "mkdocs-material-extensions"
+version = "1.3.1"
+description = "Extension pack for Python Markdown and MkDocs Material."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31"},
+    {file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"},
+]
+
+[[package]]
+name = "mkdocs-section-index"
+version = "0.3.9"
+description = "MkDocs plugin to allow clickable sections that lead to an index page"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mkdocs_section_index-0.3.9-py3-none-any.whl", hash = "sha256:5e5eb288e8d7984d36c11ead5533f376fdf23498f44e903929d72845b24dfe34"},
+    {file = "mkdocs_section_index-0.3.9.tar.gz", hash = "sha256:b66128d19108beceb08b226ee1ba0981840d14baf8a652b6c59e650f3f92e4f8"},
+]
+
+[package.dependencies]
+mkdocs = ">=1.2"
+
+[[package]]
+name = "mkdocstrings"
+version = "0.27.0"
+description = "Automatic documentation from sources, for MkDocs."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "mkdocstrings-0.27.0-py3-none-any.whl", hash = "sha256:6ceaa7ea830770959b55a16203ac63da24badd71325b96af950e59fd37366332"},
+    {file = "mkdocstrings-0.27.0.tar.gz", hash = "sha256:16adca6d6b0a1f9e0c07ff0b02ced8e16f228a9d65a37c063ec4c14d7b76a657"},
+]
+
+[package.dependencies]
+click = ">=7.0"
+importlib-metadata = {version = ">=4.6", markers = "python_version < \"3.10\""}
+Jinja2 = ">=2.11.1"
+Markdown = ">=3.6"
+MarkupSafe = ">=1.1"
+mkdocs = ">=1.4"
+mkdocs-autorefs = ">=1.2"
+platformdirs = ">=2.2"
+pymdown-extensions = ">=6.3"
+typing-extensions = {version = ">=4.1", markers = "python_version < \"3.10\""}
+
+[package.extras]
+crystal = ["mkdocstrings-crystal (>=0.3.4)"]
+python = ["mkdocstrings-python (>=0.5.2)"]
+python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"]
+
+[[package]]
+name = "mkdocstrings-python"
+version = "1.13.0"
+description = "A Python handler for mkdocstrings."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "mkdocstrings_python-1.13.0-py3-none-any.whl", hash = "sha256:b88bbb207bab4086434743849f8e796788b373bd32e7bfefbf8560ac45d88f97"},
+    {file = "mkdocstrings_python-1.13.0.tar.gz", hash = "sha256:2dbd5757e8375b9720e81db16f52f1856bf59905428fd7ef88005d1370e2f64c"},
+]
+
+[package.dependencies]
+griffe = ">=0.49"
+mkdocs-autorefs = ">=1.2"
+mkdocstrings = ">=0.26"
+
 [[package]]
 name = "mmh3"
 version = "5.0.1"
@@ -2667,6 +2929,21 @@ files = [
     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
 ]
 
+[[package]]
+name = "paginate"
+version = "0.5.7"
+description = "Divides large result sets into pages for easier browsing"
+optional = false
+python-versions = "*"
+files = [
+    {file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"},
+    {file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"},
+]
+
+[package.extras]
+dev = ["pytest", "tox"]
+lint = ["black"]
+
 [[package]]
 name = "pandas"
 version = "2.2.3"
@@ -2764,6 +3041,17 @@ files = [
     {file = "pathable-0.4.3.tar.gz", hash = "sha256:5c869d315be50776cc8a993f3af43e0c60dc01506b399643f919034ebf4cdcab"},
 ]
 
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
 [[package]]
 name = "platformdirs"
 version = "4.3.6"
@@ -3328,6 +3616,24 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
 docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 
+[[package]]
+name = "pymdown-extensions"
+version = "10.13"
+description = "Extension pack for Python Markdown."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pymdown_extensions-10.13-py3-none-any.whl", hash = "sha256:80bc33d715eec68e683e04298946d47d78c7739e79d808203df278ee8ef89428"},
+    {file = "pymdown_extensions-10.13.tar.gz", hash = "sha256:e0b351494dc0d8d14a1f52b39b1499a00ef1566b4ba23dc74f1eba75c736f5dd"},
+]
+
+[package.dependencies]
+markdown = ">=3.6"
+pyyaml = "*"
+
+[package.extras]
+extra = ["pygments (>=2.12)"]
+
 [[package]]
 name = "pyparsing"
 version = "3.2.1"
@@ -3574,6 +3880,20 @@ files = [
     {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
 ]
 
+[[package]]
+name = "pyyaml-env-tag"
+version = "0.1"
+description = "A custom YAML tag for referencing environment variables in YAML files. "
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"},
+    {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"},
+]
+
+[package.dependencies]
+pyyaml = "*"
+
 [[package]]
 name = "ray"
 version = "2.40.0"
@@ -4384,6 +4704,48 @@ platformdirs = ">=3.9.1,<5"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
 
+[[package]]
+name = "watchdog"
+version = "6.0.0"
+description = "Filesystem events monitoring"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26"},
+    {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112"},
+    {file = "watchdog-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3"},
+    {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c"},
+    {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2"},
+    {file = "watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c"},
+    {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948"},
+    {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860"},
+    {file = "watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0"},
+    {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c"},
+    {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134"},
+    {file = "watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b"},
+    {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e6f0e77c9417e7cd62af82529b10563db3423625c5fce018430b249bf977f9e8"},
+    {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:90c8e78f3b94014f7aaae121e6b909674df5b46ec24d6bebc45c44c56729af2a"},
+    {file = "watchdog-6.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7631a77ffb1f7d2eefa4445ebbee491c720a5661ddf6df3498ebecae5ed375c"},
+    {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881"},
+    {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11"},
+    {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7a0e56874cfbc4b9b05c60c8a1926fedf56324bb08cfbc188969777940aef3aa"},
+    {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6439e374fc012255b4ec786ae3c4bc838cd7309a540e5fe0952d03687d8804e"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c"},
+    {file = "watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2"},
+    {file = "watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a"},
+    {file = "watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680"},
+    {file = "watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f"},
+    {file = "watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282"},
+]
+
+[package.extras]
+watchmedo = ["PyYAML (>=3.10)"]
+
 [[package]]
 name = "werkzeug"
 version = "3.1.3"
@@ -4734,4 +5096,4 @@ zstandard = ["zstandard"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9, !=3.9.7"
-content-hash = "2084f03c93f2d1085a5671a171c6cbeb96d9688079270ceca38b0854fe9e0520"
+content-hash = "3f9ea520ceb12bb56d371c19ee4c59f14ba258878a65067c37684dfc209f85b9"
diff --git a/pyproject.toml b/pyproject.toml
index a2737c3f92..66a95a1561 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -95,6 +95,21 @@ pyspark = "3.5.3"
 cython = "3.0.11"
 deptry = ">=0.14,<0.22"
 docutils = "!=0.21.post1"   # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520
+
+[tool.poetry.group.docs.dependencies]
+# for mkdocs
+mkdocs = "1.6.1"
+griffe = "1.5.4"
+jinja2 = "3.1.5"
+mkdocstrings = "0.27.0"
+mkdocstrings-python = "1.13.0"
+mkdocs-literate-nav = "0.6.1"
+mkdocs-autorefs = "1.2.0"
+mkdocs-gen-files = "0.5.0"
+mkdocs-material = "9.5.49"
+mkdocs-material-extensions = "1.3.1"
+mkdocs-section-index = "0.3.9"
+
 [[tool.mypy.overrides]]
 module = "pytest_mock.*"
 ignore_missing_imports = true
@@ -859,6 +874,310 @@ ignore_missing_imports = true
 module = "tenacity.*"
 ignore_missing_imports = true
 
+[[tool.mypy.overrides]]
+module = "pyarrow.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pandas.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "snappy.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "zstandard.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pydantic.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pydantic_core.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pytest.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "fastavro.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "mmh3.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "hive_metastore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "thrift.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "requests_mock.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "click.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "rich.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "fsspec.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "s3fs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "azure.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "adlfs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "gcsfs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "packaging.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "tests.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "boto3"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "botocore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "mypy_boto3_glue.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "moto"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "aiobotocore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "aiohttp.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "duckdb.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "ray.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "daft.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyparsing.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyspark.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "strictyaml.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "sortedcontainers.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "sqlalchemy.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "Cython.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "setuptools.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "tenacity.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyarrow.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pandas.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "snappy.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "zstandard.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pydantic.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pydantic_core.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pytest.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "fastavro.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "mmh3.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "hive_metastore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "thrift.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "requests_mock.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "click.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "rich.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "fsspec.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "s3fs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "azure.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "adlfs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "gcsfs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "packaging.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "tests.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "boto3"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "botocore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "mypy_boto3_glue.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "moto"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "aiobotocore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "aiohttp.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "duckdb.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "ray.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "daft.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyparsing.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyspark.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "strictyaml.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "sortedcontainers.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "sqlalchemy.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "Cython.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "setuptools.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "tenacity.*"
+ignore_missing_imports = true
+
 [tool.poetry.scripts]
 pyiceberg = "pyiceberg.cli.console:run"
 

From 551f524170b12900cfaa3fef1ec8a0f9f437ee4c Mon Sep 17 00:00:00 2001
From: Jiakai Li <50531391+jiakai-li@users.noreply.github.com>
Date: Tue, 7 Jan 2025 03:47:43 +1300
Subject: [PATCH 13/32] Fix read from multiple s3 regions (#1453)

* Take netloc into account for s3 filesystem when calling `_initialize_fs`

* Fix unit test for s3 fileystem

* Update ArrowScan to use different FileSystem per file

* Add unit test for `PyArrorFileIO.fs_by_scheme` cache behavior

* Add error handling

* Update tests/io/test_pyarrow.py

Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>

* Update `s3.region` document and a test case

* Add test case for `PyArrowFileIO.new_input` multi region

* Shuffle code location for better maintainability

* Comment for future integration test

* Typo fix

* Document wording

* Add warning when the bucket region for a file cannot be resolved (for `pyarrow.S3FileSystem`)

* Fix code linting

* Update mkdocs/docs/configuration.md

Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>

* Code refactoring

* Unit test

* Code refactoring

* Test cases

* Code format

* Code tidy-up

* Update pyiceberg/io/pyarrow.py

Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>

---------

Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>
---
 mkdocs/docs/configuration.md    |  30 ++---
 pyiceberg/io/pyarrow.py         | 212 +++++++++++++++++++++-----------
 tests/integration/test_reads.py |  29 +++++
 tests/io/test_pyarrow.py        |  96 ++++++++++++++-
 4 files changed, 273 insertions(+), 94 deletions(-)

diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index 621b313613..06eaac1bed 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -102,21 +102,21 @@ For the FileIO there are several configuration options available:
 
 <!-- markdown-link-check-disable -->
 
-| Key                  | Example                    | Description                                                                                                                                                                                                                                               |
-|----------------------|----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| s3.endpoint          | <https://10.0.19.25/>      | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. |
-| s3.access-key-id     | admin                      | Configure the static access key id used to access the FileIO.                                                                                                                                                                                             |
-| s3.secret-access-key | password                   | Configure the static secret access key used to access the FileIO.                                                                                                                                                                                         |
-| s3.session-token     | AQoDYXdzEJr...             | Configure the static session token used to access the FileIO.                                                                                                                                                                                             |
-| s3.role-session-name      | session                    | An optional identifier for the assumed role session.                                                                                                                                                                                                      |
-| s3.role-arn          | arn:aws:...                | AWS Role ARN. If provided instead of access_key and secret_key, temporary credentials will be fetched by assuming this role.                                                                                                                              |
-| s3.signer            | bearer                     | Configure the signature version of the FileIO.                                                                                                                                                                                                            |
-| s3.signer.uri        | <http://my.signer:8080/s3> | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`.                                                          |
-| s3.signer.endpoint   | v1/main/s3-sign            | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`. (default : v1/aws/s3/sign).                                                            |
-| s3.region            | us-west-2                  | Sets the region of the bucket                                                                                                                                                                                                                             |
-| s3.proxy-uri         | <http://my.proxy.com:8080> | Configure the proxy server to be used by the FileIO.                                                                                                                                                                                                      |
-| s3.connect-timeout   | 60.0                       | Configure socket connection timeout, in seconds.                                                                                                                                                                                                          |
-| s3.force-virtual-addressing   | False                       | Whether to use virtual addressing of buckets. If true, then virtual addressing is always enabled. If false, then virtual addressing is only enabled if endpoint_override is empty. This can be used for non-AWS backends that only support virtual hosted-style access.                                                                                                                                                                                                       |
+| Key                  | Example                    | Description                                                                                                                                                                                                                                                             |
+|----------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| s3.endpoint          | <https://10.0.19.25/>      | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud.               |
+| s3.access-key-id     | admin                      | Configure the static access key id used to access the FileIO.                                                                                                                                                                                                           |
+| s3.secret-access-key | password                   | Configure the static secret access key used to access the FileIO.                                                                                                                                                                                                       |
+| s3.session-token     | AQoDYXdzEJr...             | Configure the static session token used to access the FileIO.                                                                                                                                                                                                           |
+| s3.role-session-name      | session                    | An optional identifier for the assumed role session.                                                                                                                                                                                                                    |
+| s3.role-arn          | arn:aws:...                | AWS Role ARN. If provided instead of access_key and secret_key, temporary credentials will be fetched by assuming this role.                                                                                                                                            |
+| s3.signer            | bearer                     | Configure the signature version of the FileIO.                                                                                                                                                                                                                          |
+| s3.signer.uri        | <http://my.signer:8080/s3> | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`.                                                                        |
+| s3.signer.endpoint   | v1/main/s3-sign            | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`. (default : v1/aws/s3/sign).                                                                          |
+| s3.region            | us-west-2                  | Configure the default region used to initialize an `S3FileSystem`. `PyArrowFileIO` attempts to automatically resolve the region for each S3 bucket, falling back to this value if resolution fails.                                                                                                           |
+| s3.proxy-uri         | <http://my.proxy.com:8080> | Configure the proxy server to be used by the FileIO.                                                                                                                                                                                                                    |
+| s3.connect-timeout   | 60.0                       | Configure socket connection timeout, in seconds.                                                                                                                                                                                                                        |
+| s3.force-virtual-addressing   | False                       | Whether to use virtual addressing of buckets. If true, then virtual addressing is always enabled. If false, then virtual addressing is only enabled if endpoint_override is empty. This can be used for non-AWS backends that only support virtual hosted-style access. |
 
 <!-- markdown-link-check-enable-->
 
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index dc41a7d6a1..ad7e4f4f85 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -351,77 +351,141 @@ def parse_location(location: str) -> Tuple[str, str, str]:
             return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"
 
     def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSystem:
-        if scheme in {"s3", "s3a", "s3n", "oss"}:
-            from pyarrow.fs import S3FileSystem
-
-            client_kwargs: Dict[str, Any] = {
-                "endpoint_override": self.properties.get(S3_ENDPOINT),
-                "access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
-                "secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
-                "session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
-                "region": get_first_property_value(self.properties, S3_REGION, AWS_REGION),
-            }
-
-            if proxy_uri := self.properties.get(S3_PROXY_URI):
-                client_kwargs["proxy_options"] = proxy_uri
-
-            if connect_timeout := self.properties.get(S3_CONNECT_TIMEOUT):
-                client_kwargs["connect_timeout"] = float(connect_timeout)
-
-            if role_arn := get_first_property_value(self.properties, S3_ROLE_ARN, AWS_ROLE_ARN):
-                client_kwargs["role_arn"] = role_arn
-
-            if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME):
-                client_kwargs["session_name"] = session_name
-
-            if force_virtual_addressing := self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING):
-                client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, force_virtual_addressing, False)
-
-            return S3FileSystem(**client_kwargs)
-        elif scheme in ("hdfs", "viewfs"):
-            from pyarrow.fs import HadoopFileSystem
-
-            hdfs_kwargs: Dict[str, Any] = {}
-            if netloc:
-                return HadoopFileSystem.from_uri(f"{scheme}://{netloc}")
-            if host := self.properties.get(HDFS_HOST):
-                hdfs_kwargs["host"] = host
-            if port := self.properties.get(HDFS_PORT):
-                # port should be an integer type
-                hdfs_kwargs["port"] = int(port)
-            if user := self.properties.get(HDFS_USER):
-                hdfs_kwargs["user"] = user
-            if kerb_ticket := self.properties.get(HDFS_KERB_TICKET):
-                hdfs_kwargs["kerb_ticket"] = kerb_ticket
-
-            return HadoopFileSystem(**hdfs_kwargs)
+        """Initialize FileSystem for different scheme."""
+        if scheme in {"oss"}:
+            return self._initialize_oss_fs()
+
+        elif scheme in {"s3", "s3a", "s3n"}:
+            return self._initialize_s3_fs(netloc)
+
+        elif scheme in {"hdfs", "viewfs"}:
+            return self._initialize_hdfs_fs(scheme, netloc)
+
         elif scheme in {"gs", "gcs"}:
-            from pyarrow.fs import GcsFileSystem
-
-            gcs_kwargs: Dict[str, Any] = {}
-            if access_token := self.properties.get(GCS_TOKEN):
-                gcs_kwargs["access_token"] = access_token
-            if expiration := self.properties.get(GCS_TOKEN_EXPIRES_AT_MS):
-                gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration))
-            if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
-                gcs_kwargs["default_bucket_location"] = bucket_location
-            if endpoint := get_first_property_value(self.properties, GCS_SERVICE_HOST, GCS_ENDPOINT):
-                if self.properties.get(GCS_ENDPOINT):
-                    deprecation_message(
-                        deprecated_in="0.8.0",
-                        removed_in="0.9.0",
-                        help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
-                    )
-                url_parts = urlparse(endpoint)
-                gcs_kwargs["scheme"] = url_parts.scheme
-                gcs_kwargs["endpoint_override"] = url_parts.netloc
+            return self._initialize_gcs_fs()
+
+        elif scheme in {"file"}:
+            return self._initialize_local_fs()
 
-            return GcsFileSystem(**gcs_kwargs)
-        elif scheme == "file":
-            return PyArrowLocalFileSystem()
         else:
             raise ValueError(f"Unrecognized filesystem type in URI: {scheme}")
 
+    def _initialize_oss_fs(self) -> FileSystem:
+        from pyarrow.fs import S3FileSystem
+
+        client_kwargs: Dict[str, Any] = {
+            "endpoint_override": self.properties.get(S3_ENDPOINT),
+            "access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
+            "secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
+            "session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
+            "region": get_first_property_value(self.properties, S3_REGION, AWS_REGION),
+        }
+
+        if proxy_uri := self.properties.get(S3_PROXY_URI):
+            client_kwargs["proxy_options"] = proxy_uri
+
+        if connect_timeout := self.properties.get(S3_CONNECT_TIMEOUT):
+            client_kwargs["connect_timeout"] = float(connect_timeout)
+
+        if role_arn := get_first_property_value(self.properties, S3_ROLE_ARN, AWS_ROLE_ARN):
+            client_kwargs["role_arn"] = role_arn
+
+        if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME):
+            client_kwargs["session_name"] = session_name
+
+        if force_virtual_addressing := self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING):
+            client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, force_virtual_addressing, False)
+
+        return S3FileSystem(**client_kwargs)
+
+    def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem:
+        from pyarrow.fs import S3FileSystem, resolve_s3_region
+
+        # Resolve region from netloc(bucket), fallback to user-provided region
+        provided_region = get_first_property_value(self.properties, S3_REGION, AWS_REGION)
+
+        try:
+            bucket_region = resolve_s3_region(bucket=netloc)
+        except (OSError, TypeError):
+            bucket_region = None
+            logger.warning(f"Unable to resolve region for bucket {netloc}, using default region {provided_region}")
+
+        bucket_region = bucket_region or provided_region
+        if bucket_region != provided_region:
+            logger.warning(
+                f"PyArrow FileIO overriding S3 bucket region for bucket {netloc}: "
+                f"provided region {provided_region}, actual region {bucket_region}"
+            )
+
+        client_kwargs: Dict[str, Any] = {
+            "endpoint_override": self.properties.get(S3_ENDPOINT),
+            "access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
+            "secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
+            "session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
+            "region": bucket_region,
+        }
+
+        if proxy_uri := self.properties.get(S3_PROXY_URI):
+            client_kwargs["proxy_options"] = proxy_uri
+
+        if connect_timeout := self.properties.get(S3_CONNECT_TIMEOUT):
+            client_kwargs["connect_timeout"] = float(connect_timeout)
+
+        if role_arn := get_first_property_value(self.properties, S3_ROLE_ARN, AWS_ROLE_ARN):
+            client_kwargs["role_arn"] = role_arn
+
+        if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME):
+            client_kwargs["session_name"] = session_name
+
+        if force_virtual_addressing := self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING):
+            client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, force_virtual_addressing, False)
+
+        return S3FileSystem(**client_kwargs)
+
+    def _initialize_hdfs_fs(self, scheme: str, netloc: Optional[str]) -> FileSystem:
+        from pyarrow.fs import HadoopFileSystem
+
+        hdfs_kwargs: Dict[str, Any] = {}
+        if netloc:
+            return HadoopFileSystem.from_uri(f"{scheme}://{netloc}")
+        if host := self.properties.get(HDFS_HOST):
+            hdfs_kwargs["host"] = host
+        if port := self.properties.get(HDFS_PORT):
+            # port should be an integer type
+            hdfs_kwargs["port"] = int(port)
+        if user := self.properties.get(HDFS_USER):
+            hdfs_kwargs["user"] = user
+        if kerb_ticket := self.properties.get(HDFS_KERB_TICKET):
+            hdfs_kwargs["kerb_ticket"] = kerb_ticket
+
+        return HadoopFileSystem(**hdfs_kwargs)
+
+    def _initialize_gcs_fs(self) -> FileSystem:
+        from pyarrow.fs import GcsFileSystem
+
+        gcs_kwargs: Dict[str, Any] = {}
+        if access_token := self.properties.get(GCS_TOKEN):
+            gcs_kwargs["access_token"] = access_token
+        if expiration := self.properties.get(GCS_TOKEN_EXPIRES_AT_MS):
+            gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration))
+        if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
+            gcs_kwargs["default_bucket_location"] = bucket_location
+        if endpoint := get_first_property_value(self.properties, GCS_SERVICE_HOST, GCS_ENDPOINT):
+            if self.properties.get(GCS_ENDPOINT):
+                deprecation_message(
+                    deprecated_in="0.8.0",
+                    removed_in="0.9.0",
+                    help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
+                )
+            url_parts = urlparse(endpoint)
+            gcs_kwargs["scheme"] = url_parts.scheme
+            gcs_kwargs["endpoint_override"] = url_parts.netloc
+
+        return GcsFileSystem(**gcs_kwargs)
+
+    def _initialize_local_fs(self) -> FileSystem:
+        return PyArrowLocalFileSystem()
+
     def new_input(self, location: str) -> PyArrowFile:
         """Get a PyArrowFile instance to read bytes from the file at the given location.
 
@@ -1326,13 +1390,14 @@ def _task_to_table(
         return None
 
 
-def _read_all_delete_files(fs: FileSystem, tasks: Iterable[FileScanTask]) -> Dict[str, List[ChunkedArray]]:
+def _read_all_delete_files(io: FileIO, tasks: Iterable[FileScanTask]) -> Dict[str, List[ChunkedArray]]:
     deletes_per_file: Dict[str, List[ChunkedArray]] = {}
     unique_deletes = set(itertools.chain.from_iterable([task.delete_files for task in tasks]))
     if len(unique_deletes) > 0:
         executor = ExecutorFactory.get_or_create()
         deletes_per_files: Iterator[Dict[str, ChunkedArray]] = executor.map(
-            lambda args: _read_deletes(*args), [(fs, delete) for delete in unique_deletes]
+            lambda args: _read_deletes(*args),
+            [(_fs_from_file_path(io, delete_file.file_path), delete_file) for delete_file in unique_deletes],
         )
         for delete in deletes_per_files:
             for file, arr in delete.items():
@@ -1344,7 +1409,7 @@ def _read_all_delete_files(fs: FileSystem, tasks: Iterable[FileScanTask]) -> Dic
     return deletes_per_file
 
 
-def _fs_from_file_path(file_path: str, io: FileIO) -> FileSystem:
+def _fs_from_file_path(io: FileIO, file_path: str) -> FileSystem:
     scheme, netloc, _ = _parse_location(file_path)
     if isinstance(io, PyArrowFileIO):
         return io.fs_by_scheme(scheme, netloc)
@@ -1366,7 +1431,6 @@ def _fs_from_file_path(file_path: str, io: FileIO) -> FileSystem:
 class ArrowScan:
     _table_metadata: TableMetadata
     _io: FileIO
-    _fs: FileSystem
     _projected_schema: Schema
     _bound_row_filter: BooleanExpression
     _case_sensitive: bool
@@ -1376,7 +1440,6 @@ class ArrowScan:
     Attributes:
         _table_metadata: Current table metadata of the Iceberg table
         _io: PyIceberg FileIO implementation from which to fetch the io properties
-        _fs: PyArrow FileSystem to use to read the files
         _projected_schema: Iceberg Schema to project onto the data files
         _bound_row_filter: Schema bound row expression to filter the data with
         _case_sensitive: Case sensitivity when looking up column names
@@ -1394,7 +1457,6 @@ def __init__(
     ) -> None:
         self._table_metadata = table_metadata
         self._io = io
-        self._fs = _fs_from_file_path(table_metadata.location, io)  # TODO: use different FileSystem per file
         self._projected_schema = projected_schema
         self._bound_row_filter = bind(table_metadata.schema(), row_filter, case_sensitive=case_sensitive)
         self._case_sensitive = case_sensitive
@@ -1434,7 +1496,7 @@ def to_table(self, tasks: Iterable[FileScanTask]) -> pa.Table:
             ResolveError: When a required field cannot be found in the file
             ValueError: When a field type in the file cannot be projected to the schema type
         """
-        deletes_per_file = _read_all_delete_files(self._fs, tasks)
+        deletes_per_file = _read_all_delete_files(self._io, tasks)
         executor = ExecutorFactory.get_or_create()
 
         def _table_from_scan_task(task: FileScanTask) -> pa.Table:
@@ -1497,7 +1559,7 @@ def to_record_batches(self, tasks: Iterable[FileScanTask]) -> Iterator[pa.Record
             ResolveError: When a required field cannot be found in the file
             ValueError: When a field type in the file cannot be projected to the schema type
         """
-        deletes_per_file = _read_all_delete_files(self._fs, tasks)
+        deletes_per_file = _read_all_delete_files(self._io, tasks)
         return self._record_batches_from_scan_tasks_and_deletes(tasks, deletes_per_file)
 
     def _record_batches_from_scan_tasks_and_deletes(
@@ -1508,7 +1570,7 @@ def _record_batches_from_scan_tasks_and_deletes(
             if self._limit is not None and total_row_count >= self._limit:
                 break
             batches = _task_to_record_batches(
-                self._fs,
+                _fs_from_file_path(self._io, task.file.file_path),
                 task,
                 self._bound_row_filter,
                 self._projected_schema,
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
index 8d13724087..f2e79bae60 100644
--- a/tests/integration/test_reads.py
+++ b/tests/integration/test_reads.py
@@ -19,6 +19,7 @@
 import math
 import time
 import uuid
+from pathlib import PosixPath
 from urllib.parse import urlparse
 
 import pyarrow as pa
@@ -921,3 +922,31 @@ def test_table_scan_empty_table(catalog: Catalog) -> None:
     result_table = tbl.scan().to_arrow()
 
     assert len(result_table) == 0
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+def test_read_from_s3_and_local_fs(catalog: Catalog, tmp_path: PosixPath) -> None:
+    identifier = "default.test_read_from_s3_and_local_fs"
+    schema = pa.schema([pa.field("colA", pa.string())])
+    arrow_table = pa.Table.from_arrays([pa.array(["one"])], schema=schema)
+
+    tmp_dir = tmp_path / "data"
+    tmp_dir.mkdir()
+    local_file = tmp_dir / "local_file.parquet"
+
+    try:
+        catalog.drop_table(identifier)
+    except NoSuchTableError:
+        pass
+    tbl = catalog.create_table(identifier, schema=schema)
+
+    # Append table to s3 endpoint
+    tbl.append(arrow_table)
+
+    # Append a local file
+    pq.write_table(arrow_table, local_file)
+    tbl.add_files([str(local_file)])
+
+    result_table = tbl.scan().to_arrow()
+    assert result_table["colA"].to_pylist() == ["one", "one"]
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index 8bb97e150a..8beb750f49 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=protected-access,unused-argument,redefined-outer-name
-
+import logging
 import os
 import tempfile
 import uuid
@@ -27,7 +27,7 @@
 import pyarrow as pa
 import pyarrow.parquet as pq
 import pytest
-from pyarrow.fs import FileType, LocalFileSystem
+from pyarrow.fs import FileType, LocalFileSystem, S3FileSystem
 
 from pyiceberg.exceptions import ResolveError
 from pyiceberg.expressions import (
@@ -360,10 +360,12 @@ def test_pyarrow_s3_session_properties() -> None:
         **UNIFIED_AWS_SESSION_PROPERTIES,
     }
 
-    with patch("pyarrow.fs.S3FileSystem") as mock_s3fs:
+    with patch("pyarrow.fs.S3FileSystem") as mock_s3fs, patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
         s3_fileio = PyArrowFileIO(properties=session_properties)
         filename = str(uuid.uuid4())
 
+        # Mock `resolve_s3_region` to prevent from the location used resolving to a different s3 region
+        mock_s3_region_resolver.side_effect = OSError("S3 bucket is not found")
         s3_fileio.new_input(location=f"s3://warehouse/{filename}")
 
         mock_s3fs.assert_called_with(
@@ -381,10 +383,11 @@ def test_pyarrow_unified_session_properties() -> None:
         **UNIFIED_AWS_SESSION_PROPERTIES,
     }
 
-    with patch("pyarrow.fs.S3FileSystem") as mock_s3fs:
+    with patch("pyarrow.fs.S3FileSystem") as mock_s3fs, patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
         s3_fileio = PyArrowFileIO(properties=session_properties)
         filename = str(uuid.uuid4())
 
+        mock_s3_region_resolver.return_value = "client.region"
         s3_fileio.new_input(location=f"s3://warehouse/{filename}")
 
         mock_s3fs.assert_called_with(
@@ -2096,3 +2099,88 @@ def test__to_requested_schema_timestamps_without_downcast_raises_exception(
         _to_requested_schema(requested_schema, file_schema, batch, downcast_ns_timestamp_to_us=False, include_field_ids=False)
 
     assert "Unsupported schema projection from timestamp[ns] to timestamp[us]" in str(exc_info.value)
+
+
+def test_pyarrow_file_io_fs_by_scheme_cache() -> None:
+    # It's better to set up multi-region minio servers for an integration test once `endpoint_url` argument becomes available for `resolve_s3_region`
+    # Refer to: https://github.com/apache/arrow/issues/43713
+
+    pyarrow_file_io = PyArrowFileIO()
+    us_east_1_region = "us-east-1"
+    ap_southeast_2_region = "ap-southeast-2"
+
+    with patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
+        # Call with new argument resolves region automatically
+        mock_s3_region_resolver.return_value = us_east_1_region
+        filesystem_us = pyarrow_file_io.fs_by_scheme("s3", "us-east-1-bucket")
+        assert filesystem_us.region == us_east_1_region
+        assert pyarrow_file_io.fs_by_scheme.cache_info().misses == 1  # type: ignore
+        assert pyarrow_file_io.fs_by_scheme.cache_info().currsize == 1  # type: ignore
+
+        # Call with different argument also resolves region automatically
+        mock_s3_region_resolver.return_value = ap_southeast_2_region
+        filesystem_ap_southeast_2 = pyarrow_file_io.fs_by_scheme("s3", "ap-southeast-2-bucket")
+        assert filesystem_ap_southeast_2.region == ap_southeast_2_region
+        assert pyarrow_file_io.fs_by_scheme.cache_info().misses == 2  # type: ignore
+        assert pyarrow_file_io.fs_by_scheme.cache_info().currsize == 2  # type: ignore
+
+        # Call with same argument hits cache
+        filesystem_us_cached = pyarrow_file_io.fs_by_scheme("s3", "us-east-1-bucket")
+        assert filesystem_us_cached.region == us_east_1_region
+        assert pyarrow_file_io.fs_by_scheme.cache_info().hits == 1  # type: ignore
+
+        # Call with same argument hits cache
+        filesystem_ap_southeast_2_cached = pyarrow_file_io.fs_by_scheme("s3", "ap-southeast-2-bucket")
+        assert filesystem_ap_southeast_2_cached.region == ap_southeast_2_region
+        assert pyarrow_file_io.fs_by_scheme.cache_info().hits == 2  # type: ignore
+
+
+def test_pyarrow_io_new_input_multi_region(caplog: Any) -> None:
+    # It's better to set up multi-region minio servers for an integration test once `endpoint_url` argument becomes available for `resolve_s3_region`
+    # Refer to: https://github.com/apache/arrow/issues/43713
+    user_provided_region = "ap-southeast-1"
+    bucket_regions = [
+        ("us-east-2-bucket", "us-east-2"),
+        ("ap-southeast-2-bucket", "ap-southeast-2"),
+    ]
+
+    def _s3_region_map(bucket: str) -> str:
+        for bucket_region in bucket_regions:
+            if bucket_region[0] == bucket:
+                return bucket_region[1]
+        raise OSError("Unknown bucket")
+
+    # For a pyarrow io instance with configured default s3 region
+    pyarrow_file_io = PyArrowFileIO({"s3.region": user_provided_region})
+    with patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
+        mock_s3_region_resolver.side_effect = _s3_region_map
+
+        # The region is set to provided region if bucket region cannot be resolved
+        with caplog.at_level(logging.WARNING):
+            assert pyarrow_file_io.new_input("s3://non-exist-bucket/path/to/file")._filesystem.region == user_provided_region
+        assert f"Unable to resolve region for bucket non-exist-bucket, using default region {user_provided_region}" in caplog.text
+
+        for bucket_region in bucket_regions:
+            # For s3 scheme, region is overwritten by resolved bucket region if different from user provided region
+            with caplog.at_level(logging.WARNING):
+                assert pyarrow_file_io.new_input(f"s3://{bucket_region[0]}/path/to/file")._filesystem.region == bucket_region[1]
+            assert (
+                f"PyArrow FileIO overriding S3 bucket region for bucket {bucket_region[0]}: "
+                f"provided region {user_provided_region}, actual region {bucket_region[1]}" in caplog.text
+            )
+
+            # For oss scheme, user provided region is used instead
+            assert pyarrow_file_io.new_input(f"oss://{bucket_region[0]}/path/to/file")._filesystem.region == user_provided_region
+
+
+def test_pyarrow_io_multi_fs() -> None:
+    pyarrow_file_io = PyArrowFileIO({"s3.region": "ap-southeast-1"})
+
+    with patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
+        mock_s3_region_resolver.return_value = None
+
+        # The PyArrowFileIO instance resolves s3 file input to S3FileSystem
+        assert isinstance(pyarrow_file_io.new_input("s3://bucket/path/to/file")._filesystem, S3FileSystem)
+
+        # Same PyArrowFileIO instance resolves local file input to LocalFileSystem
+        assert isinstance(pyarrow_file_io.new_input("file:///path/to/file")._filesystem, LocalFileSystem)

From e39f91a03d652b84c96acbf8ceac29777514344d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 7 Jan 2025 10:33:53 -0500
Subject: [PATCH 14/32] Bump moto from 5.0.25 to 5.0.26 (#1490)

Bumps [moto](https://github.com/getmoto/moto) from 5.0.25 to 5.0.26.
- [Release notes](https://github.com/getmoto/moto/releases)
- [Changelog](https://github.com/getmoto/moto/blob/master/CHANGELOG.md)
- [Commits](https://github.com/getmoto/moto/compare/5.0.25...5.0.26)

---
updated-dependencies:
- dependency-name: moto
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index b1b73746c1..c95252517a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1941,6 +1941,8 @@ optional = false
 python-versions = "*"
 files = [
     {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
+    {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
+    {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
 ]
 
 [package.dependencies]
@@ -2494,13 +2496,13 @@ type = ["mypy (==1.11.2)"]
 
 [[package]]
 name = "moto"
-version = "5.0.25"
+version = "5.0.26"
 description = "A library that allows you to easily mock out tests based on AWS infrastructure"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "moto-5.0.25-py3-none-any.whl", hash = "sha256:ab790f9d7d08f30667a196af7cacead03e76c10be2d1148ea00a731d47918a1e"},
-    {file = "moto-5.0.25.tar.gz", hash = "sha256:deea8b158cec5a65c9635ae1fff4579d735b11ac8a0e5226fbbeb742ce0ce6b2"},
+    {file = "moto-5.0.26-py3-none-any.whl", hash = "sha256:803831f427ca6c0452ae4fb898d731cfc19906466a33a88cbc1076abcbfcbba7"},
+    {file = "moto-5.0.26.tar.gz", hash = "sha256:6829f58a670a087e7c5b63f8183c6b72d64a1444e420c212250b7326b69a9183"},
 ]
 
 [package.dependencies]
@@ -3313,6 +3315,7 @@ files = [
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
+    {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},

From 3b580111760f0749922ea593dbe0b1d602952438 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 7 Jan 2025 12:23:06 -0500
Subject: [PATCH 15/32] Build: Bump pytest-checkdocs from 2.10.1 to 2.13.0
 (#682)

Bumps [pytest-checkdocs](https://github.com/jaraco/pytest-checkdocs) from 2.10.1 to 2.13.0.
- [Release notes](https://github.com/jaraco/pytest-checkdocs/releases)
- [Changelog](https://github.com/jaraco/pytest-checkdocs/blob/main/NEWS.rst)
- [Commits](https://github.com/jaraco/pytest-checkdocs/compare/v2.10.1...v2.13.0)

---
updated-dependencies:
- dependency-name: pytest-checkdocs
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock    | 265 +++++++++++++++++++++++++++++++++++++++++++++++--
 pyproject.toml |   2 +-
 2 files changed, 259 insertions(+), 8 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index c95252517a..7bc22bec33 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -185,6 +185,17 @@ files = [
 [package.dependencies]
 frozenlist = ">=1.1.0"
 
+[[package]]
+name = "alabaster"
+version = "0.7.16"
+description = "A light, configurable Sphinx theme"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"},
+    {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"},
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -359,6 +370,21 @@ files = [
 [package.extras]
 dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
 
+[[package]]
+name = "backports-tarfile"
+version = "1.2.0"
+description = "Backport of CPython tarfile module"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
+    {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
+]
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)"]
+
 [[package]]
 name = "blinker"
 version = "1.9.0"
@@ -428,6 +454,7 @@ importlib-metadata = {version = ">=4.6", markers = "python_full_version < \"3.10
 packaging = ">=19.1"
 pyproject_hooks = "*"
 tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+virtualenv = {version = ">=20.0.35", optional = true, markers = "extra == \"virtualenv\""}
 
 [package.extras]
 docs = ["furo (>=2023.08.17)", "sphinx (>=7.0,<8.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)", "sphinx-issues (>=3.0.0)"]
@@ -1103,6 +1130,25 @@ files = [
     {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"},
 ]
 
+[[package]]
+name = "domdf-python-tools"
+version = "3.9.0"
+description = "Helpful functions for Python 🐍 🛠️"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "domdf_python_tools-3.9.0-py3-none-any.whl", hash = "sha256:4e1ef365cbc24627d6d1e90cf7d46d8ab8df967e1237f4a26885f6986c78872e"},
+    {file = "domdf_python_tools-3.9.0.tar.gz", hash = "sha256:1f8a96971178333a55e083e35610d7688cd7620ad2b99790164e1fc1a3614c18"},
+]
+
+[package.dependencies]
+natsort = ">=7.0.1"
+typing-extensions = ">=3.7.4.1"
+
+[package.extras]
+all = ["pytz (>=2019.1)"]
+dates = ["pytz (>=2019.1)"]
+
 [[package]]
 name = "duckdb"
 version = "1.1.3"
@@ -1818,6 +1864,17 @@ files = [
 [package.extras]
 all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
 
+[[package]]
+name = "imagesize"
+version = "1.4.1"
+description = "Getting image size from png/jpeg/jpeg2000/gif file"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"},
+    {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"},
+]
+
 [[package]]
 name = "importlib-metadata"
 version = "8.5.0"
@@ -1874,6 +1931,45 @@ files = [
     {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"},
 ]
 
+[[package]]
+name = "jaraco-context"
+version = "6.0.1"
+description = "Useful decorators and context managers"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jaraco.context-6.0.1-py3-none-any.whl", hash = "sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4"},
+    {file = "jaraco_context-6.0.1.tar.gz", hash = "sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3"},
+]
+
+[package.dependencies]
+"backports.tarfile" = {version = "*", markers = "python_version < \"3.12\""}
+
+[package.extras]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["portend", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+
+[[package]]
+name = "jaraco-packaging"
+version = "10.2.3"
+description = "tools to supplement packaging Python releases"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jaraco.packaging-10.2.3-py3-none-any.whl", hash = "sha256:ceb5806d2ac5731ba5b265d196e4cb848afa2a958f01d0bf3a1dfaa3969ed92c"},
+    {file = "jaraco_packaging-10.2.3.tar.gz", hash = "sha256:d726cc42faa62b2f70585cbe1176b4b469fe6d75f21b19034b688b4340917933"},
+]
+
+[package.dependencies]
+build = {version = "*", extras = ["virtualenv"]}
+domdf-python-tools = "*"
+"jaraco.context" = "*"
+sphinx = "*"
+
+[package.extras]
+doc = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "types-docutils"]
+
 [[package]]
 name = "jinja2"
 version = "3.1.5"
@@ -2795,6 +2891,21 @@ files = [
 [package.dependencies]
 typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""}
 
+[[package]]
+name = "natsort"
+version = "8.4.0"
+description = "Simple yet flexible natural sorting in Python."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"},
+    {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"},
+]
+
+[package.extras]
+fast = ["fastnumbers (>=2.0.0)"]
+icu = ["PyICU (>=1.0.0)"]
+
 [[package]]
 name = "networkx"
 version = "3.2.1"
@@ -3706,23 +3817,22 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no
 
 [[package]]
 name = "pytest-checkdocs"
-version = "2.10.1"
+version = "2.13.0"
 description = "check the README when running tests"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pytest-checkdocs-2.10.1.tar.gz", hash = "sha256:393868583f2d0314f8c5828fd94f7d28699543f6a0a925356d7e274e2952297e"},
-    {file = "pytest_checkdocs-2.10.1-py3-none-any.whl", hash = "sha256:f069d6408633697023298ebf66c9bb1cb915c3ae5f047457b507229a4784e153"},
+    {file = "pytest_checkdocs-2.13.0-py3-none-any.whl", hash = "sha256:5df5bbd7e9753aa51a5f6954a301a4066bd4a04eb7e0c712c5d5d7ede1cbe153"},
+    {file = "pytest_checkdocs-2.13.0.tar.gz", hash = "sha256:b0e67169c543986142e15afbc17c772da87fcdb0922c7b1e4f6c60f8769f11f9"},
 ]
 
 [package.dependencies]
-build = "*"
 docutils = ">=0.15"
-importlib-metadata = {version = ">=4", markers = "python_version < \"3.10\""}
+"jaraco.packaging" = ">=9.5"
 
 [package.extras]
 docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff", "types-docutils"]
+testing = ["pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "types-docutils"]
 
 [[package]]
 name = "pytest-lazy-fixture"
@@ -4389,6 +4499,17 @@ files = [
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
 ]
 
+[[package]]
+name = "snowballstemmer"
+version = "2.2.0"
+description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms."
+optional = false
+python-versions = "*"
+files = [
+    {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
+    {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
+]
+
 [[package]]
 name = "sortedcontainers"
 version = "2.4.0"
@@ -4400,6 +4521,136 @@ files = [
     {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
 ]
 
+[[package]]
+name = "sphinx"
+version = "7.4.7"
+description = "Python documentation generator"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239"},
+    {file = "sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe"},
+]
+
+[package.dependencies]
+alabaster = ">=0.7.14,<0.8.0"
+babel = ">=2.13"
+colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""}
+docutils = ">=0.20,<0.22"
+imagesize = ">=1.3"
+importlib-metadata = {version = ">=6.0", markers = "python_version < \"3.10\""}
+Jinja2 = ">=3.1"
+packaging = ">=23.0"
+Pygments = ">=2.17"
+requests = ">=2.30.0"
+snowballstemmer = ">=2.2"
+sphinxcontrib-applehelp = "*"
+sphinxcontrib-devhelp = "*"
+sphinxcontrib-htmlhelp = ">=2.0.0"
+sphinxcontrib-jsmath = "*"
+sphinxcontrib-qthelp = "*"
+sphinxcontrib-serializinghtml = ">=1.1.9"
+tomli = {version = ">=2", markers = "python_version < \"3.11\""}
+
+[package.extras]
+docs = ["sphinxcontrib-websupport"]
+lint = ["flake8 (>=6.0)", "importlib-metadata (>=6.0)", "mypy (==1.10.1)", "pytest (>=6.0)", "ruff (==0.5.2)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-docutils (==0.21.0.20240711)", "types-requests (>=2.30.0)"]
+test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"]
+
+[[package]]
+name = "sphinxcontrib-applehelp"
+version = "2.0.0"
+description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5"},
+    {file = "sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-devhelp"
+version = "2.0.0"
+description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2"},
+    {file = "sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-htmlhelp"
+version = "2.1.0"
+description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8"},
+    {file = "sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["html5lib", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-jsmath"
+version = "1.0.1"
+description = "A sphinx extension which renders display math in HTML via JavaScript"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"},
+    {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
+]
+
+[package.extras]
+test = ["flake8", "mypy", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-qthelp"
+version = "2.0.0"
+description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb"},
+    {file = "sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["defusedxml (>=0.7.1)", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-serializinghtml"
+version = "2.0.0"
+description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331"},
+    {file = "sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["pytest"]
+
 [[package]]
 name = "sqlalchemy"
 version = "2.0.36"
@@ -5099,4 +5350,4 @@ zstandard = ["zstandard"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9, !=3.9.7"
-content-hash = "3f9ea520ceb12bb56d371c19ee4c59f14ba258878a65067c37684dfc209f85b9"
+content-hash = "59e5678cd718f658c5bd099c03051564ee60f991e5f222bf92da13d1dd025a42"
diff --git a/pyproject.toml b/pyproject.toml
index 66a95a1561..58dac055ca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -82,7 +82,7 @@ cachetools = "^5.5.0"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "7.4.4"
-pytest-checkdocs = "2.10.1"
+pytest-checkdocs = "2.13.0"
 pytest-lazy-fixture = "0.6.3"
 pre-commit = "4.0.1"
 fastavro = "1.10.0"

From e6af50eaa09b3a8437e0a65c02a4637105503305 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 12:55:51 -0500
Subject: [PATCH 16/32] Build: Bump boto3 from 1.35.88 to 1.35.93 (#1495)

Bumps [boto3](https://github.com/boto/boto3) from 1.35.88 to 1.35.93.
- [Release notes](https://github.com/boto/boto3/releases)
- [Commits](https://github.com/boto/boto3/compare/1.35.88...1.35.93)

---
updated-dependencies:
- dependency-name: boto3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 7bc22bec33..c96050b0df 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -25,24 +25,31 @@ tests = ["arrow", "dask[dataframe]", "docker", "pytest", "pytest-mock"]
 
 [[package]]
 name = "aiobotocore"
-version = "2.16.1"
+version = "2.17.0"
 description = "Async client for aws services using botocore and aiohttp"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "aiobotocore-2.16.1-py3-none-any.whl", hash = "sha256:e7cf6295471224c82a111deaf31c2c3a4bcd6dbd6973e75c7fc4739fcccd5b0b"},
-    {file = "aiobotocore-2.16.1.tar.gz", hash = "sha256:0f94904c6a1d14d5aac0502fcc1d721b95ee60d46d8a0e546f6203de0410d522"},
+    {file = "aiobotocore-2.17.0-py3-none-any.whl", hash = "sha256:aedccd5368a64401233ef9f27983d3d3cb6a507a6ca981f5ec1df014c00e260e"},
+    {file = "aiobotocore-2.17.0.tar.gz", hash = "sha256:a3041333c565bff9d63b4468bee4944f2d81cff63a45b10e5cc652f3837f9cc2"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.9.2,<4.0.0"
 aioitertools = ">=0.5.1,<1.0.0"
-botocore = ">=1.35.74,<1.35.89"
+botocore = ">=1.35.74,<1.35.94"
+jmespath = ">=0.7.1,<2.0.0"
+multidict = ">=6.0.0,<7.0.0"
+python-dateutil = ">=2.1,<3.0.0"
+urllib3 = [
+    {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""},
+    {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""},
+]
 wrapt = ">=1.10.10,<2.0.0"
 
 [package.extras]
-awscli = ["awscli (>=1.36.15,<1.36.30)"]
-boto3 = ["boto3 (>=1.35.74,<1.35.89)"]
+awscli = ["awscli (>=1.36.15,<1.36.35)"]
+boto3 = ["boto3 (>=1.35.74,<1.35.94)"]
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -398,17 +405,17 @@ files = [
 
 [[package]]
 name = "boto3"
-version = "1.35.88"
+version = "1.35.93"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.35.88-py3-none-any.whl", hash = "sha256:7bc9b27ad87607256470c70a86c8b8c319ddd6ecae89cc191687cbf8ccb7b6a6"},
-    {file = "boto3-1.35.88.tar.gz", hash = "sha256:43c6a7a70bb226770a82a601870136e3bb3bf2808f4576ab5b9d7d140dbf1323"},
+    {file = "boto3-1.35.93-py3-none-any.whl", hash = "sha256:7de2c44c960e486f3c57e5203ea6393c6c4f0914c5f81c789ceb8b5d2ba5d1c5"},
+    {file = "boto3-1.35.93.tar.gz", hash = "sha256:2446e819cf4e295833474cdcf2c92bc82718ce537e9ee1f17f7e3d237f60e69b"},
 ]
 
 [package.dependencies]
-botocore = ">=1.35.88,<1.36.0"
+botocore = ">=1.35.93,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -417,13 +424,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.35.88"
+version = "1.35.93"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.35.88-py3-none-any.whl", hash = "sha256:e60cc3fbe8d7a10f70e7e852d76be2b29f23ead418a5899d366ea32b1eacb5a5"},
-    {file = "botocore-1.35.88.tar.gz", hash = "sha256:58dcd9a464c354b8c6c25261d8de830d175d9739eae568bf0c52e57116fb03c6"},
+    {file = "botocore-1.35.93-py3-none-any.whl", hash = "sha256:47f7161000af6036f806449e3de12acdd3ec11aac7f5578e43e96241413a0f8f"},
+    {file = "botocore-1.35.93.tar.gz", hash = "sha256:b8d245a01e7d64c41edcf75a42be158df57b9518a83a3dbf5c7e4b8c2bc540cc"},
 ]
 
 [package.dependencies]

From c9249c330e47e05a52284124ff8172bcc232c737 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 12:55:59 -0500
Subject: [PATCH 17/32] Build: Bump mypy-boto3-glue from 1.35.87 to 1.35.93
 (#1496)

Bumps [mypy-boto3-glue](https://github.com/youtype/mypy_boto3_builder) from 1.35.87 to 1.35.93.
- [Release notes](https://github.com/youtype/mypy_boto3_builder/releases)
- [Commits](https://github.com/youtype/mypy_boto3_builder/commits)

---
updated-dependencies:
- dependency-name: mypy-boto3-glue
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index c96050b0df..684d304bba 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2886,17 +2886,17 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""}
 
 [[package]]
 name = "mypy-boto3-glue"
-version = "1.35.87"
-description = "Type annotations for boto3 Glue 1.35.87 service generated with mypy-boto3-builder 8.7.0"
+version = "1.35.93"
+description = "Type annotations for boto3 Glue 1.35.93 service generated with mypy-boto3-builder 8.8.0"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "mypy_boto3_glue-1.35.87-py3-none-any.whl", hash = "sha256:c4c62daf80e99ad539491b63814b7cf94a5e4f1fca732540a9aaae458af52691"},
-    {file = "mypy_boto3_glue-1.35.87.tar.gz", hash = "sha256:d1d5f1bb5c5297045a1a650a6672c46a319e3cf373085d2303c2179dc5b46d7d"},
+    {file = "mypy_boto3_glue-1.35.93-py3-none-any.whl", hash = "sha256:cf46553f68048124bad65345b593ec5ba3806bd9bd15a1d7516d0cb3d79a0652"},
+    {file = "mypy_boto3_glue-1.35.93.tar.gz", hash = "sha256:27759a83ffa5414b2589da83625816a3c7cb97600fec68578bd3012a9ae20ee8"},
 ]
 
 [package.dependencies]
-typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""}
+typing-extensions = {version = "*", markers = "python_version < \"3.12\""}
 
 [[package]]
 name = "natsort"

From a95f9ee6e231104319c01493cb3ada59d9e782d0 Mon Sep 17 00:00:00 2001
From: jeppe-dos <jeppe.f.sorensen@jppol.dk>
Date: Thu, 9 Jan 2025 19:14:22 +0100
Subject: [PATCH 18/32] Change dot notation in add column documentation to
 tuple (#1433)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Change dot notation in add column documentation to tuple

* Update move and rename column struct in api.md

* Correct rename_column, move_before and delete_column in api.md

* Change exchange to processed by on rename_column in api.md

* Update mkdocs/docs/api.md

Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>

* Fix rename column in api.md

* Update mkdocs/docs/api.md

* Update mkdocs/docs/api.md

---------

Co-authored-by: Jeppe Finne Sørensen <jeppe.f.sorensen@polhus.rootdom.dk>
Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>
---
 mkdocs/docs/api.md | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md
index 9c48718877..8b106c1034 100644
--- a/mkdocs/docs/api.md
+++ b/mkdocs/docs/api.md
@@ -1072,8 +1072,12 @@ Using `add_column` you can add a column, without having to worry about the field
 with table.update_schema() as update:
     update.add_column("retries", IntegerType(), "Number of retries to place the bid")
     # In a struct
-    update.add_column("details.confirmed_by", StringType(), "Name of the exchange")
+    update.add_column("details", StructType())
+
+with table.update_schema() as update:
+    update.add_column(("details", "confirmed_by"), StringType(), "Name of the exchange")
 ```
+A complex type must exist before columns can be added to it. Fields in complex types are added in a tuple.
 
 ### Rename column
 
@@ -1082,20 +1086,21 @@ Renaming a field in an Iceberg table is simple:
 ```python
 with table.update_schema() as update:
     update.rename_column("retries", "num_retries")
-    # This will rename `confirmed_by` to `exchange`
-    update.rename_column("properties.confirmed_by", "exchange")
+    # This will rename `confirmed_by` to `processed_by` in the `details` struct
+    update.rename_column(("details", "confirmed_by"), "processed_by")
 ```
 
 ### Move column
 
-Move a field inside of struct:
+Move order of fields:
 
 ```python
 with table.update_schema() as update:
     update.move_first("symbol")
+    # This will move `bid` after `ask`
     update.move_after("bid", "ask")
-    # This will move `confirmed_by` before `exchange`
-    update.move_before("details.created_by", "details.exchange")
+    # This will move `confirmed_by` before `exchange` in the `details` struct
+    update.move_before(("details", "confirmed_by"), ("details", "exchange"))
 ```
 
 ### Update column
@@ -1127,6 +1132,8 @@ Delete a field, careful this is a incompatible change (readers/writers might exp
 ```python
 with table.update_schema(allow_incompatible_changes=True) as update:
     update.delete_column("some_field")
+    # In a struct
+    update.delete_column(("details", "confirmed_by"))
 ```
 
 ## Partition evolution

From 19ad24ef7d32485701c4baf85565a6f3614839ff Mon Sep 17 00:00:00 2001
From: smaheshwar-pltr <maheshwarsreesh@gmail.com>
Date: Fri, 10 Jan 2025 20:43:28 +0000
Subject: [PATCH 19/32] Nit fixes to URL-encoding of partition field names
 (#1499)

* Revert "Add `make_name_compatible` suggestion so test passes"

This reverts commit 61cdd08c59f3f1d3119b5f907eb09dbbcf80b8c2.

* Nit fixes to URL-encoding of partition field names

* Fix tests

* Collapse

* Make lint

---------

Co-authored-by: Sreesh Maheshwar <smaheshwar@palantir.com>
---
 mkdocs/docs/api.md                         |  1 +
 pyiceberg/partitioning.py                  |  7 +---
 tests/integration/test_partitioning_key.py | 47 ++--------------------
 3 files changed, 7 insertions(+), 48 deletions(-)

diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md
index 8b106c1034..f1ef69b9cb 100644
--- a/mkdocs/docs/api.md
+++ b/mkdocs/docs/api.md
@@ -1077,6 +1077,7 @@ with table.update_schema() as update:
 with table.update_schema() as update:
     update.add_column(("details", "confirmed_by"), StringType(), "Name of the exchange")
 ```
+
 A complex type must exist before columns can be added to it. Fields in complex types are added in a tuple.
 
 ### Rename column
diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py
index c9b6316f59..1813772217 100644
--- a/pyiceberg/partitioning.py
+++ b/pyiceberg/partitioning.py
@@ -234,11 +234,8 @@ def partition_to_path(self, data: Record, schema: Schema) -> str:
             partition_field = self.fields[pos]
             value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=data[pos])
 
-            value_str = quote_plus(value_str, safe="")
-            value_strs.append(value_str)
-
-            field_str = quote_plus(partition_field.name, safe="")
-            field_strs.append(field_str)
+            value_strs.append(quote_plus(value_str, safe=""))
+            field_strs.append(quote_plus(partition_field.name, safe=""))
 
         path = "/".join([field_str + "=" + value_str for field_str, value_str in zip(field_strs, value_strs)])
         return path
diff --git a/tests/integration/test_partitioning_key.py b/tests/integration/test_partitioning_key.py
index 1ac808c7d0..3955259d33 100644
--- a/tests/integration/test_partitioning_key.py
+++ b/tests/integration/test_partitioning_key.py
@@ -18,7 +18,7 @@
 import uuid
 from datetime import date, datetime, timedelta, timezone
 from decimal import Decimal
-from typing import Any, Callable, List, Optional
+from typing import Any, List
 
 import pytest
 from pyspark.sql import SparkSession
@@ -26,7 +26,7 @@
 
 from pyiceberg.catalog import Catalog
 from pyiceberg.partitioning import PartitionField, PartitionFieldValue, PartitionKey, PartitionSpec
-from pyiceberg.schema import Schema
+from pyiceberg.schema import Schema, make_compatible_name
 from pyiceberg.transforms import (
     BucketTransform,
     DayTransform,
@@ -78,7 +78,7 @@
 
 
 @pytest.mark.parametrize(
-    "partition_fields, partition_values, expected_partition_record, expected_hive_partition_path_slice, spark_create_table_sql_for_justification, spark_data_insert_sql_for_justification, make_compatible_name",
+    "partition_fields, partition_values, expected_partition_record, expected_hive_partition_path_slice, spark_create_table_sql_for_justification, spark_data_insert_sql_for_justification",
     [
         # # Identity Transform
         (
@@ -99,7 +99,6 @@
             VALUES
             (false, 'Boolean field set to false');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="string_field")],
@@ -119,7 +118,6 @@
             VALUES
             ('sample_string', 'Another string value')
             """,
-            None,
         ),
         (
             [PartitionField(source_id=4, field_id=1001, transform=IdentityTransform(), name="int_field")],
@@ -139,7 +137,6 @@
             VALUES
             (42, 'Associated string value for int 42')
             """,
-            None,
         ),
         (
             [PartitionField(source_id=5, field_id=1001, transform=IdentityTransform(), name="long_field")],
@@ -159,7 +156,6 @@
             VALUES
             (1234567890123456789, 'Associated string value for long 1234567890123456789')
             """,
-            None,
         ),
         (
             [PartitionField(source_id=6, field_id=1001, transform=IdentityTransform(), name="float_field")],
@@ -183,7 +179,6 @@
             # VALUES
             # (3.14, 'Associated string value for float 3.14')
             # """
-            None,
         ),
         (
             [PartitionField(source_id=7, field_id=1001, transform=IdentityTransform(), name="double_field")],
@@ -207,7 +202,6 @@
             # VALUES
             # (6.282, 'Associated string value for double 6.282')
             # """
-            None,
         ),
         (
             [PartitionField(source_id=8, field_id=1001, transform=IdentityTransform(), name="timestamp_field")],
@@ -227,7 +221,6 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999' AS TIMESTAMP_NTZ), 'Associated string value for timestamp 2023-01-01T12:00:00')
             """,
-            None,
         ),
         (
             [PartitionField(source_id=8, field_id=1001, transform=IdentityTransform(), name="timestamp_field")],
@@ -247,7 +240,6 @@
             VALUES
             (CAST('2023-01-01 12:00:01' AS TIMESTAMP_NTZ), 'Associated string value for timestamp 2023-01-01T12:00:00')
             """,
-            None,
         ),
         (
             [PartitionField(source_id=8, field_id=1001, transform=IdentityTransform(), name="timestamp_field")],
@@ -272,7 +264,6 @@
             # VALUES
             # (CAST('2023-01-01 12:00:00' AS TIMESTAMP_NTZ), 'Associated string value for timestamp 2023-01-01T12:00:00')
             # """
-            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=IdentityTransform(), name="timestamptz_field")],
@@ -297,7 +288,6 @@
             # VALUES
             # (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Associated string value for timestamp 2023-01-01 12:00:01.000999+03:00')
             # """
-            None,
         ),
         (
             [PartitionField(source_id=10, field_id=1001, transform=IdentityTransform(), name="date_field")],
@@ -317,7 +307,6 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Associated string value for date 2023-01-01')
             """,
-            None,
         ),
         (
             [PartitionField(source_id=14, field_id=1001, transform=IdentityTransform(), name="uuid_field")],
@@ -337,7 +326,6 @@
             VALUES
             ('f47ac10b-58cc-4372-a567-0e02b2c3d479', 'Associated string value for UUID f47ac10b-58cc-4372-a567-0e02b2c3d479')
             """,
-            None,
         ),
         (
             [PartitionField(source_id=11, field_id=1001, transform=IdentityTransform(), name="binary_field")],
@@ -357,7 +345,6 @@
             VALUES
             (CAST('example' AS BINARY), 'Associated string value for binary `example`')
             """,
-            None,
         ),
         (
             [PartitionField(source_id=13, field_id=1001, transform=IdentityTransform(), name="decimal_field")],
@@ -377,7 +364,6 @@
             VALUES
             (123.45, 'Associated string value for decimal 123.45')
             """,
-            None,
         ),
         # # Year Month Day Hour Transform
         # Month Transform
@@ -399,7 +385,6 @@
             VALUES
             (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP_NTZ), 'Event at 2023-01-01 11:55:59.999999');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=MonthTransform(), name="timestamptz_field_month")],
@@ -419,7 +404,6 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=10, field_id=1001, transform=MonthTransform(), name="date_field_month")],
@@ -439,7 +423,6 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01');
             """,
-            None,
         ),
         # Year Transform
         (
@@ -460,7 +443,6 @@
             VALUES
             (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), 'Event at 2023-01-01 11:55:59.999999');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=YearTransform(), name="timestamptz_field_year")],
@@ -480,7 +462,6 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=10, field_id=1001, transform=YearTransform(), name="date_field_year")],
@@ -500,7 +481,6 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01');
             """,
-            None,
         ),
         # # Day Transform
         (
@@ -521,7 +501,6 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=DayTransform(), name="timestamptz_field_day")],
@@ -541,7 +520,6 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=10, field_id=1001, transform=DayTransform(), name="date_field_day")],
@@ -561,7 +539,6 @@
             VALUES
             (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01');
             """,
-            None,
         ),
         # Hour Transform
         (
@@ -582,7 +559,6 @@
                 VALUES
                 (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), 'Event within the 11th hour of 2023-01-01');
                 """,
-            None,
         ),
         (
             [PartitionField(source_id=9, field_id=1001, transform=HourTransform(), name="timestamptz_field_hour")],
@@ -602,7 +578,6 @@
             VALUES
             (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00');
             """,
-            None,
         ),
         # Truncate Transform
         (
@@ -623,7 +598,6 @@
                 VALUES
                 (12345, 'Sample data for int');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=5, field_id=1001, transform=TruncateTransform(2), name="bigint_field_trunc")],
@@ -643,7 +617,6 @@
             VALUES
             (4294967297, 'Sample data for long');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=2, field_id=1001, transform=TruncateTransform(3), name="string_field_trunc")],
@@ -663,7 +636,6 @@
             VALUES
             ('abcdefg', 'Another sample for string');
             """,
-            None,
         ),
         (
             [PartitionField(source_id=13, field_id=1001, transform=TruncateTransform(width=5), name="decimal_field_trunc")],
@@ -683,7 +655,6 @@
             VALUES
             (678.90, 'Associated string value for decimal 678.90')
             """,
-            None,
         ),
         (
             [PartitionField(source_id=11, field_id=1001, transform=TruncateTransform(10), name="binary_field_trunc")],
@@ -703,7 +674,6 @@
                 VALUES
                 (binary('HELLOICEBERG'), 'Sample data for binary');
             """,
-            None,
         ),
         # Bucket Transform
         (
@@ -724,7 +694,6 @@
             VALUES
             (10, 'Integer with value 10');
             """,
-            None,
         ),
         # Test multiple field combinations could generate the Partition record and hive partition path correctly
         (
@@ -753,7 +722,6 @@
             VALUES
             (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), CAST('2023-01-01' AS DATE), 'some data');
             """,
-            None,
         ),
         # Test that special characters are URL-encoded
         (
@@ -773,7 +741,6 @@
             VALUES
             ('special string')
             """,
-            lambda name: name.replace("#", "_x23").replace("+", "_x2B"),
         ),
     ],
 )
@@ -787,7 +754,6 @@ def test_partition_key(
     expected_hive_partition_path_slice: str,
     spark_create_table_sql_for_justification: str,
     spark_data_insert_sql_for_justification: str,
-    make_compatible_name: Optional[Callable[[str], str]],
 ) -> None:
     partition_field_values = [PartitionFieldValue(field, value) for field, value in zip(partition_fields, partition_values)]
     spec = PartitionSpec(*partition_fields)
@@ -823,11 +789,6 @@ def test_partition_key(
             snapshot.manifests(iceberg_table.io)[0].fetch_manifest_entry(iceberg_table.io)[0].data_file.file_path
         )
         # Special characters in partition value are sanitized when written to the data file's partition field
-        # Use `make_compatible_name` to match the sanitize behavior
-        sanitized_record = (
-            Record(**{make_compatible_name(k): v for k, v in vars(expected_partition_record).items()})
-            if make_compatible_name
-            else expected_partition_record
-        )
+        sanitized_record = Record(**{make_compatible_name(k): v for k, v in vars(expected_partition_record).items()})
         assert spark_partition_for_justification == sanitized_record
         assert expected_hive_partition_path_slice in spark_path_for_justification

From ae272b5b37a3132932548d06fb9e8acd23f2bc57 Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevinjqliu@users.noreply.github.com>
Date: Fri, 10 Jan 2025 16:01:36 -0500
Subject: [PATCH 20/32] bump version to 0.9.0 (#1489)

* bump to 0.8.1

* bump to 0.9.0
---
 pyiceberg/__init__.py | 2 +-
 pyproject.toml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/__init__.py b/pyiceberg/__init__.py
index 42c6e12f1b..e97de9276f 100644
--- a/pyiceberg/__init__.py
+++ b/pyiceberg/__init__.py
@@ -15,4 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-__version__ = "0.8.0"
+__version__ = "0.9.0"
diff --git a/pyproject.toml b/pyproject.toml
index 58dac055ca..56be937305 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@
 # under the License.
 [tool.poetry]
 name = "pyiceberg"
-version = "0.8.0"
+version = "0.9.0"
 readme = "README.md"
 homepage = "https://py.iceberg.apache.org/"
 repository = "https://github.com/apache/iceberg-python"

From d9c5d6b4adf8c300ca47e3ac32cbe41c41f0bbd3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 10 Jan 2025 17:06:37 -0500
Subject: [PATCH 21/32] Build: Bump pydantic from 2.10.4 to 2.10.5 (#1504)

Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.10.4 to 2.10.5.
- [Release notes](https://github.com/pydantic/pydantic/releases)
- [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md)
- [Commits](https://github.com/pydantic/pydantic/compare/v2.10.4...v2.10.5)

---
updated-dependencies:
- dependency-name: pydantic
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 684d304bba..156595db29 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3573,13 +3573,13 @@ files = [
 
 [[package]]
 name = "pydantic"
-version = "2.10.4"
+version = "2.10.5"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.10.4-py3-none-any.whl", hash = "sha256:597e135ea68be3a37552fb524bc7d0d66dcf93d395acd93a00682f1efcb8ee3d"},
-    {file = "pydantic-2.10.4.tar.gz", hash = "sha256:82f12e9723da6de4fe2ba888b5971157b3be7ad914267dea8f05f82b28254f06"},
+    {file = "pydantic-2.10.5-py3-none-any.whl", hash = "sha256:4dd4e322dbe55472cb7ca7e73f4b63574eecccf2835ffa2af9021ce113c83c53"},
+    {file = "pydantic-2.10.5.tar.gz", hash = "sha256:278b38dbbaec562011d659ee05f63346951b3a248a6f3642e1bc68894ea2b4ff"},
 ]
 
 [package.dependencies]

From 52665512466c50c5fa62d026f8d7436b63bafcb9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 10 Jan 2025 17:06:51 -0500
Subject: [PATCH 22/32] Build: Bump getdaft from 0.4.1 to 0.4.2 (#1503)

Bumps [getdaft](https://github.com/Eventual-Inc/Daft) from 0.4.1 to 0.4.2.
- [Release notes](https://github.com/Eventual-Inc/Daft/releases)
- [Commits](https://github.com/Eventual-Inc/Daft/compare/v0.4.1...v0.4.2)

---
updated-dependencies:
- dependency-name: getdaft
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 156595db29..2c1ace347e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1496,17 +1496,17 @@ gcsfuse = ["fusepy"]
 
 [[package]]
 name = "getdaft"
-version = "0.4.1"
+version = "0.4.2"
 description = "Distributed Dataframes for Multimodal Data"
 optional = true
 python-versions = ">=3.9"
 files = [
-    {file = "getdaft-0.4.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:04b91c019be87415138edfa61c379174a49760c4474c60eb37b1c24ae010a7d5"},
-    {file = "getdaft-0.4.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:6254f33b5292b3198b6a0e4fdd0d2f568ff624930203d9af75bbc3b7e40e8c0b"},
-    {file = "getdaft-0.4.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a642f786175f543cb0d2dc585577c554b135f5ac2e7b34bfbe359dd86adbdbae"},
-    {file = "getdaft-0.4.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c1e1b0c283e0efc5102dea04db9a98bad6bcf36829a6c3d6cd511e8805514c0"},
-    {file = "getdaft-0.4.1-cp39-abi3-win_amd64.whl", hash = "sha256:46985b2ec980134b97d3b8e95becd2b654cb74e2952d7b24b6f3b55d28d16de2"},
-    {file = "getdaft-0.4.1.tar.gz", hash = "sha256:d3ad8b11b06bbf25b62a091444917593933ff53c39fb4a8abca8cbc6dde3b917"},
+    {file = "getdaft-0.4.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3760e69e66e571dbb42ad354954bd52d3ce8eafdfc93c9bdaf2c1ed42017808e"},
+    {file = "getdaft-0.4.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:2b1c072f69663b87e4f3aa926cf7441d1d150fe46a6d2b32c8b01f72a237680b"},
+    {file = "getdaft-0.4.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0e6450fd90743bd981575dc3a1b6694fe1e4a9fe2fc31ea5ad1ca92e1dabef2"},
+    {file = "getdaft-0.4.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0852c71f81e1ff4fffd60ee7542ff325d1e93ec857adff8c26494a0188dc79ae"},
+    {file = "getdaft-0.4.2-cp39-abi3-win_amd64.whl", hash = "sha256:687031e101dd4df151f387cc8a2a60bfc6bda640d4deb2d3a74a4f742eb57edf"},
+    {file = "getdaft-0.4.2.tar.gz", hash = "sha256:9d253a5dce0ee798be9737ef1da60f313235fd459b4ff3b48e6aafe30538ff21"},
 ]
 
 [package.dependencies]

From 691740df974cc584b890110784ff8b6ac733cfdc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 10 Jan 2025 17:07:00 -0500
Subject: [PATCH 23/32] Build: Bump sqlalchemy from 2.0.36 to 2.0.37 (#1502)

Bumps [sqlalchemy](https://github.com/sqlalchemy/sqlalchemy) from 2.0.36 to 2.0.37.
- [Release notes](https://github.com/sqlalchemy/sqlalchemy/releases)
- [Changelog](https://github.com/sqlalchemy/sqlalchemy/blob/main/CHANGES.rst)
- [Commits](https://github.com/sqlalchemy/sqlalchemy/commits)

---
updated-dependencies:
- dependency-name: sqlalchemy
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 118 ++++++++++++++++++++++++++--------------------------
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 2c1ace347e..687ff5a3a8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4660,72 +4660,72 @@ test = ["pytest"]
 
 [[package]]
 name = "sqlalchemy"
-version = "2.0.36"
+version = "2.0.37"
 description = "Database Abstraction Library"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:59b8f3adb3971929a3e660337f5dacc5942c2cdb760afcabb2614ffbda9f9f72"},
-    {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37350015056a553e442ff672c2d20e6f4b6d0b2495691fa239d8aa18bb3bc908"},
-    {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8318f4776c85abc3f40ab185e388bee7a6ea99e7fa3a30686580b209eaa35c08"},
-    {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c245b1fbade9c35e5bd3b64270ab49ce990369018289ecfde3f9c318411aaa07"},
-    {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:69f93723edbca7342624d09f6704e7126b152eaed3cdbb634cb657a54332a3c5"},
-    {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f9511d8dd4a6e9271d07d150fb2f81874a3c8c95e11ff9af3a2dfc35fe42ee44"},
-    {file = "SQLAlchemy-2.0.36-cp310-cp310-win32.whl", hash = "sha256:c3f3631693003d8e585d4200730616b78fafd5a01ef8b698f6967da5c605b3fa"},
-    {file = "SQLAlchemy-2.0.36-cp310-cp310-win_amd64.whl", hash = "sha256:a86bfab2ef46d63300c0f06936bd6e6c0105faa11d509083ba8f2f9d237fb5b5"},
-    {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fd3a55deef00f689ce931d4d1b23fa9f04c880a48ee97af488fd215cf24e2a6c"},
-    {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4f5e9cd989b45b73bd359f693b935364f7e1f79486e29015813c338450aa5a71"},
-    {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ddd9db6e59c44875211bc4c7953a9f6638b937b0a88ae6d09eb46cced54eff"},
-    {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2519f3a5d0517fc159afab1015e54bb81b4406c278749779be57a569d8d1bb0d"},
-    {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59b1ee96617135f6e1d6f275bbe988f419c5178016f3d41d3c0abb0c819f75bb"},
-    {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:39769a115f730d683b0eb7b694db9789267bcd027326cccc3125e862eb03bfd8"},
-    {file = "SQLAlchemy-2.0.36-cp311-cp311-win32.whl", hash = "sha256:66bffbad8d6271bb1cc2f9a4ea4f86f80fe5e2e3e501a5ae2a3dc6a76e604e6f"},
-    {file = "SQLAlchemy-2.0.36-cp311-cp311-win_amd64.whl", hash = "sha256:23623166bfefe1487d81b698c423f8678e80df8b54614c2bf4b4cfcd7c711959"},
-    {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7b64e6ec3f02c35647be6b4851008b26cff592a95ecb13b6788a54ef80bbdd4"},
-    {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46331b00096a6db1fdc052d55b101dbbfc99155a548e20a0e4a8e5e4d1362855"},
-    {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdf3386a801ea5aba17c6410dd1dc8d39cf454ca2565541b5ac42a84e1e28f53"},
-    {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9dfa18ff2a67b09b372d5db8743c27966abf0e5344c555d86cc7199f7ad83a"},
-    {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:90812a8933df713fdf748b355527e3af257a11e415b613dd794512461eb8a686"},
-    {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1bc330d9d29c7f06f003ab10e1eaced295e87940405afe1b110f2eb93a233588"},
-    {file = "SQLAlchemy-2.0.36-cp312-cp312-win32.whl", hash = "sha256:79d2e78abc26d871875b419e1fd3c0bca31a1cb0043277d0d850014599626c2e"},
-    {file = "SQLAlchemy-2.0.36-cp312-cp312-win_amd64.whl", hash = "sha256:b544ad1935a8541d177cb402948b94e871067656b3a0b9e91dbec136b06a2ff5"},
-    {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5cc79df7f4bc3d11e4b542596c03826063092611e481fcf1c9dfee3c94355ef"},
-    {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3c01117dd36800f2ecaa238c65365b7b16497adc1522bf84906e5710ee9ba0e8"},
-    {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9bc633f4ee4b4c46e7adcb3a9b5ec083bf1d9a97c1d3854b92749d935de40b9b"},
-    {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e46ed38affdfc95d2c958de328d037d87801cfcbea6d421000859e9789e61c2"},
-    {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b2985c0b06e989c043f1dc09d4fe89e1616aadd35392aea2844f0458a989eacf"},
-    {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a121d62ebe7d26fec9155f83f8be5189ef1405f5973ea4874a26fab9f1e262c"},
-    {file = "SQLAlchemy-2.0.36-cp313-cp313-win32.whl", hash = "sha256:0572f4bd6f94752167adfd7c1bed84f4b240ee6203a95e05d1e208d488d0d436"},
-    {file = "SQLAlchemy-2.0.36-cp313-cp313-win_amd64.whl", hash = "sha256:8c78ac40bde930c60e0f78b3cd184c580f89456dd87fc08f9e3ee3ce8765ce88"},
-    {file = "SQLAlchemy-2.0.36-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:be9812b766cad94a25bc63bec11f88c4ad3629a0cec1cd5d4ba48dc23860486b"},
-    {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aae840ebbd6cdd41af1c14590e5741665e5272d2fee999306673a1bb1fdb4d"},
-    {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4557e1f11c5f653ebfdd924f3f9d5ebfc718283b0b9beebaa5dd6b77ec290971"},
-    {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07b441f7d03b9a66299ce7ccf3ef2900abc81c0db434f42a5694a37bd73870f2"},
-    {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:28120ef39c92c2dd60f2721af9328479516844c6b550b077ca450c7d7dc68575"},
-    {file = "SQLAlchemy-2.0.36-cp37-cp37m-win32.whl", hash = "sha256:b81ee3d84803fd42d0b154cb6892ae57ea6b7c55d8359a02379965706c7efe6c"},
-    {file = "SQLAlchemy-2.0.36-cp37-cp37m-win_amd64.whl", hash = "sha256:f942a799516184c855e1a32fbc7b29d7e571b52612647866d4ec1c3242578fcb"},
-    {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3d6718667da04294d7df1670d70eeddd414f313738d20a6f1d1f379e3139a545"},
-    {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:72c28b84b174ce8af8504ca28ae9347d317f9dba3999e5981a3cd441f3712e24"},
-    {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b11d0cfdd2b095e7b0686cf5fabeb9c67fae5b06d265d8180715b8cfa86522e3"},
-    {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e32092c47011d113dc01ab3e1d3ce9f006a47223b18422c5c0d150af13a00687"},
-    {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6a440293d802d3011028e14e4226da1434b373cbaf4a4bbb63f845761a708346"},
-    {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c54a1e53a0c308a8e8a7dffb59097bff7facda27c70c286f005327f21b2bd6b1"},
-    {file = "SQLAlchemy-2.0.36-cp38-cp38-win32.whl", hash = "sha256:1e0d612a17581b6616ff03c8e3d5eff7452f34655c901f75d62bd86449d9750e"},
-    {file = "SQLAlchemy-2.0.36-cp38-cp38-win_amd64.whl", hash = "sha256:8958b10490125124463095bbdadda5aa22ec799f91958e410438ad6c97a7b793"},
-    {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dc022184d3e5cacc9579e41805a681187650e170eb2fd70e28b86192a479dcaa"},
-    {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b817d41d692bf286abc181f8af476c4fbef3fd05e798777492618378448ee689"},
-    {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4e46a888b54be23d03a89be510f24a7652fe6ff660787b96cd0e57a4ebcb46d"},
-    {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4ae3005ed83f5967f961fd091f2f8c5329161f69ce8480aa8168b2d7fe37f06"},
-    {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03e08af7a5f9386a43919eda9de33ffda16b44eb11f3b313e6822243770e9763"},
-    {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3dbb986bad3ed5ceaf090200eba750b5245150bd97d3e67343a3cfed06feecf7"},
-    {file = "SQLAlchemy-2.0.36-cp39-cp39-win32.whl", hash = "sha256:9fe53b404f24789b5ea9003fc25b9a3988feddebd7e7b369c8fac27ad6f52f28"},
-    {file = "SQLAlchemy-2.0.36-cp39-cp39-win_amd64.whl", hash = "sha256:af148a33ff0349f53512a049c6406923e4e02bf2f26c5fb285f143faf4f0e46a"},
-    {file = "SQLAlchemy-2.0.36-py3-none-any.whl", hash = "sha256:fddbe92b4760c6f5d48162aef14824add991aeda8ddadb3c31d56eb15ca69f8e"},
-    {file = "sqlalchemy-2.0.36.tar.gz", hash = "sha256:7f2767680b6d2398aea7082e45a774b2b0767b5c8d8ffb9c8b683088ea9b29c5"},
+    {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da36c3b0e891808a7542c5c89f224520b9a16c7f5e4d6a1156955605e54aef0e"},
+    {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e7402ff96e2b073a98ef6d6142796426d705addd27b9d26c3b32dbaa06d7d069"},
+    {file = "SQLAlchemy-2.0.37-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6f5d254a22394847245f411a2956976401e84da4288aa70cbcd5190744062c1"},
+    {file = "SQLAlchemy-2.0.37-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41296bbcaa55ef5fdd32389a35c710133b097f7b2609d8218c0eabded43a1d84"},
+    {file = "SQLAlchemy-2.0.37-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bedee60385c1c0411378cbd4dc486362f5ee88deceea50002772912d798bb00f"},
+    {file = "SQLAlchemy-2.0.37-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6c67415258f9f3c69867ec02fea1bf6508153709ecbd731a982442a590f2b7e4"},
+    {file = "SQLAlchemy-2.0.37-cp310-cp310-win32.whl", hash = "sha256:650dcb70739957a492ad8acff65d099a9586b9b8920e3507ca61ec3ce650bb72"},
+    {file = "SQLAlchemy-2.0.37-cp310-cp310-win_amd64.whl", hash = "sha256:93d1543cd8359040c02b6614421c8e10cd7a788c40047dbc507ed46c29ae5636"},
+    {file = "SQLAlchemy-2.0.37-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:78361be6dc9073ed17ab380985d1e45e48a642313ab68ab6afa2457354ff692c"},
+    {file = "SQLAlchemy-2.0.37-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b661b49d0cb0ab311a189b31e25576b7ac3e20783beb1e1817d72d9d02508bf5"},
+    {file = "SQLAlchemy-2.0.37-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d57bafbab289e147d064ffbd5cca2d7b1394b63417c0636cea1f2e93d16eb9e8"},
+    {file = "SQLAlchemy-2.0.37-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fa2c0913f02341d25fb858e4fb2031e6b0813494cca1ba07d417674128ce11b"},
+    {file = "SQLAlchemy-2.0.37-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9df21b8d9e5c136ea6cde1c50d2b1c29a2b5ff2b1d610165c23ff250e0704087"},
+    {file = "SQLAlchemy-2.0.37-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db18ff6b8c0f1917f8b20f8eca35c28bbccb9f83afa94743e03d40203ed83de9"},
+    {file = "SQLAlchemy-2.0.37-cp311-cp311-win32.whl", hash = "sha256:46954173612617a99a64aee103bcd3f078901b9a8dcfc6ae80cbf34ba23df989"},
+    {file = "SQLAlchemy-2.0.37-cp311-cp311-win_amd64.whl", hash = "sha256:7b7e772dc4bc507fdec4ee20182f15bd60d2a84f1e087a8accf5b5b7a0dcf2ba"},
+    {file = "SQLAlchemy-2.0.37-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2952748ecd67ed3b56773c185e85fc084f6bdcdec10e5032a7c25a6bc7d682ef"},
+    {file = "SQLAlchemy-2.0.37-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3151822aa1db0eb5afd65ccfafebe0ef5cda3a7701a279c8d0bf17781a793bb4"},
+    {file = "SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eaa8039b6d20137a4e02603aba37d12cd2dde7887500b8855356682fc33933f4"},
+    {file = "SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1cdba1f73b64530c47b27118b7053b8447e6d6f3c8104e3ac59f3d40c33aa9fd"},
+    {file = "SQLAlchemy-2.0.37-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1b2690456528a87234a75d1a1644cdb330a6926f455403c8e4f6cad6921f9098"},
+    {file = "SQLAlchemy-2.0.37-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf5ae8a9dcf657fd72144a7fd01f243236ea39e7344e579a121c4205aedf07bb"},
+    {file = "SQLAlchemy-2.0.37-cp312-cp312-win32.whl", hash = "sha256:ea308cec940905ba008291d93619d92edaf83232ec85fbd514dcb329f3192761"},
+    {file = "SQLAlchemy-2.0.37-cp312-cp312-win_amd64.whl", hash = "sha256:635d8a21577341dfe4f7fa59ec394b346da12420b86624a69e466d446de16aff"},
+    {file = "SQLAlchemy-2.0.37-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8c4096727193762e72ce9437e2a86a110cf081241919ce3fab8e89c02f6b6658"},
+    {file = "SQLAlchemy-2.0.37-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e4fb5ac86d8fe8151966814f6720996430462e633d225497566b3996966b9bdb"},
+    {file = "SQLAlchemy-2.0.37-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e56a139bfe136a22c438478a86f8204c1eb5eed36f4e15c4224e4b9db01cb3e4"},
+    {file = "SQLAlchemy-2.0.37-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f95fc8e3f34b5f6b3effb49d10ac97c569ec8e32f985612d9b25dd12d0d2e94"},
+    {file = "SQLAlchemy-2.0.37-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c505edd429abdfe3643fa3b2e83efb3445a34a9dc49d5f692dd087be966020e0"},
+    {file = "SQLAlchemy-2.0.37-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:12b0f1ec623cccf058cf21cb544f0e74656618165b083d78145cafde156ea7b6"},
+    {file = "SQLAlchemy-2.0.37-cp313-cp313-win32.whl", hash = "sha256:293f9ade06b2e68dd03cfb14d49202fac47b7bb94bffcff174568c951fbc7af2"},
+    {file = "SQLAlchemy-2.0.37-cp313-cp313-win_amd64.whl", hash = "sha256:d70f53a0646cc418ca4853da57cf3ddddbccb8c98406791f24426f2dd77fd0e2"},
+    {file = "SQLAlchemy-2.0.37-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:44f569d0b1eb82301b92b72085583277316e7367e038d97c3a1a899d9a05e342"},
+    {file = "SQLAlchemy-2.0.37-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2eae3423e538c10d93ae3e87788c6a84658c3ed6db62e6a61bb9495b0ad16bb"},
+    {file = "SQLAlchemy-2.0.37-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfff7be361048244c3aa0f60b5e63221c5e0f0e509f4e47b8910e22b57d10ae7"},
+    {file = "SQLAlchemy-2.0.37-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:5bc3339db84c5fb9130ac0e2f20347ee77b5dd2596ba327ce0d399752f4fce39"},
+    {file = "SQLAlchemy-2.0.37-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:84b9f23b0fa98a6a4b99d73989350a94e4a4ec476b9a7dfe9b79ba5939f5e80b"},
+    {file = "SQLAlchemy-2.0.37-cp37-cp37m-win32.whl", hash = "sha256:51bc9cfef83e0ac84f86bf2b10eaccb27c5a3e66a1212bef676f5bee6ef33ebb"},
+    {file = "SQLAlchemy-2.0.37-cp37-cp37m-win_amd64.whl", hash = "sha256:8e47f1af09444f87c67b4f1bb6231e12ba6d4d9f03050d7fc88df6d075231a49"},
+    {file = "SQLAlchemy-2.0.37-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6b788f14c5bb91db7f468dcf76f8b64423660a05e57fe277d3f4fad7b9dcb7ce"},
+    {file = "SQLAlchemy-2.0.37-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:521ef85c04c33009166777c77e76c8a676e2d8528dc83a57836b63ca9c69dcd1"},
+    {file = "SQLAlchemy-2.0.37-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75311559f5c9881a9808eadbeb20ed8d8ba3f7225bef3afed2000c2a9f4d49b9"},
+    {file = "SQLAlchemy-2.0.37-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cce918ada64c956b62ca2c2af59b125767097ec1dca89650a6221e887521bfd7"},
+    {file = "SQLAlchemy-2.0.37-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9d087663b7e1feabea8c578d6887d59bb00388158e8bff3a76be11aa3f748ca2"},
+    {file = "SQLAlchemy-2.0.37-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cf95a60b36997dad99692314c4713f141b61c5b0b4cc5c3426faad570b31ca01"},
+    {file = "SQLAlchemy-2.0.37-cp38-cp38-win32.whl", hash = "sha256:d75ead7dd4d255068ea0f21492ee67937bd7c90964c8f3c2bea83c7b7f81b95f"},
+    {file = "SQLAlchemy-2.0.37-cp38-cp38-win_amd64.whl", hash = "sha256:74bbd1d0a9bacf34266a7907d43260c8d65d31d691bb2356f41b17c2dca5b1d0"},
+    {file = "SQLAlchemy-2.0.37-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:648ec5acf95ad59255452ef759054f2176849662af4521db6cb245263ae4aa33"},
+    {file = "SQLAlchemy-2.0.37-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:35bd2df269de082065d4b23ae08502a47255832cc3f17619a5cea92ce478b02b"},
+    {file = "SQLAlchemy-2.0.37-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f581d365af9373a738c49e0c51e8b18e08d8a6b1b15cc556773bcd8a192fa8b"},
+    {file = "SQLAlchemy-2.0.37-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82df02816c14f8dc9f4d74aea4cb84a92f4b0620235daa76dde002409a3fbb5a"},
+    {file = "SQLAlchemy-2.0.37-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:94b564e38b344d3e67d2e224f0aec6ba09a77e4582ced41e7bfd0f757d926ec9"},
+    {file = "SQLAlchemy-2.0.37-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:955a2a765aa1bd81aafa69ffda179d4fe3e2a3ad462a736ae5b6f387f78bfeb8"},
+    {file = "SQLAlchemy-2.0.37-cp39-cp39-win32.whl", hash = "sha256:03f0528c53ca0b67094c4764523c1451ea15959bbf0a8a8a3096900014db0278"},
+    {file = "SQLAlchemy-2.0.37-cp39-cp39-win_amd64.whl", hash = "sha256:4b12885dc85a2ab2b7d00995bac6d967bffa8594123b02ed21e8eb2205a7584b"},
+    {file = "SQLAlchemy-2.0.37-py3-none-any.whl", hash = "sha256:a8998bf9f8658bd3839cbc44ddbe982955641863da0c1efe5b00c1ab4f5c16b1"},
+    {file = "sqlalchemy-2.0.37.tar.gz", hash = "sha256:12b28d99a9c14eaf4055810df1001557176716de0167b91026e648e65229bffb"},
 ]
 
 [package.dependencies]
-greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"}
+greenlet = {version = "!=0.4.17", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"}
 typing-extensions = ">=4.6.0"
 
 [package.extras]

From c68b9b1eb0530c5df2a8b114f6df54b63a8374d8 Mon Sep 17 00:00:00 2001
From: smaheshwar-pltr <maheshwarsreesh@gmail.com>
Date: Fri, 10 Jan 2025 22:33:48 +0000
Subject: [PATCH 24/32] Support Location Providers (#1452)

* Skeletal implementation

* First attempt at hashing locations

* Relocate to table submodule; code and comment improvements

* Add unit tests

* Remove entropy check

* Nit: Prefer `self.table_properties`

* Remove special character testing

* Add integration tests for writes

* Move all `LocationProviders`-related code into locations.py

* Nit: tiny for loop refactor

* Fix typo

* Object storage as default location provider

* Update tests/integration/test_writes/test_partitioned_writes.py

Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>

* Test entropy in test_object_storage_injects_entropy

* Refactor integration tests to use properties and omit when default once

* Use a different table property for custom location provision

* write.location-provider.py-impl -> write.py-location-provider.impl

* Make lint

* Move location provider loading into `write_file` for back-compat

* Make object storage no longer the default

* Add test case for partitioned paths disabled but with no partition special case

* Moved constants within ObjectStoreLocationProvider

---------

Co-authored-by: Sreesh Maheshwar <smaheshwar@palantir.com>
Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>
---
 pyiceberg/io/pyarrow.py                       |   7 +-
 pyiceberg/table/__init__.py                   |  15 +-
 pyiceberg/table/locations.py                  | 145 ++++++++++++++++++
 .../test_writes/test_partitioned_writes.py    |  39 +++++
 tests/integration/test_writes/test_writes.py  |  27 ++++
 tests/table/test_locations.py                 | 130 ++++++++++++++++
 6 files changed, 355 insertions(+), 8 deletions(-)
 create mode 100644 pyiceberg/table/locations.py
 create mode 100644 tests/table/test_locations.py

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index ad7e4f4f85..1ce0842844 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -136,6 +136,7 @@
     visit,
     visit_with_partner,
 )
+from pyiceberg.table.locations import load_location_provider
 from pyiceberg.table.metadata import TableMetadata
 from pyiceberg.table.name_mapping import NameMapping, apply_name_mapping
 from pyiceberg.transforms import TruncateTransform
@@ -2305,6 +2306,7 @@ def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteT
         property_name=TableProperties.PARQUET_ROW_GROUP_LIMIT,
         default=TableProperties.PARQUET_ROW_GROUP_LIMIT_DEFAULT,
     )
+    location_provider = load_location_provider(table_location=table_metadata.location, table_properties=table_metadata.properties)
 
     def write_parquet(task: WriteTask) -> DataFile:
         table_schema = table_metadata.schema()
@@ -2327,7 +2329,10 @@ def write_parquet(task: WriteTask) -> DataFile:
             for batch in task.record_batches
         ]
         arrow_table = pa.Table.from_batches(batches)
-        file_path = f"{table_metadata.location}/data/{task.generate_data_file_path('parquet')}"
+        file_path = location_provider.new_data_location(
+            data_file_name=task.generate_data_file_filename("parquet"),
+            partition_key=task.partition_key,
+        )
         fo = io.new_output(file_path)
         with fo.create(overwrite=True) as fos:
             with pq.ParquetWriter(fos, schema=arrow_table.schema, **parquet_writer_kwargs) as writer:
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 7bc3fe838b..0c8c848c43 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -187,6 +187,14 @@ class TableProperties:
     WRITE_PARTITION_SUMMARY_LIMIT = "write.summary.partition-limit"
     WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT = 0
 
+    WRITE_PY_LOCATION_PROVIDER_IMPL = "write.py-location-provider.impl"
+
+    OBJECT_STORE_ENABLED = "write.object-storage.enabled"
+    OBJECT_STORE_ENABLED_DEFAULT = False
+
+    WRITE_OBJECT_STORE_PARTITIONED_PATHS = "write.object-storage.partitioned-paths"
+    WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT = True
+
     DELETE_MODE = "write.delete.mode"
     DELETE_MODE_COPY_ON_WRITE = "copy-on-write"
     DELETE_MODE_MERGE_ON_READ = "merge-on-read"
@@ -1613,13 +1621,6 @@ def generate_data_file_filename(self, extension: str) -> str:
         # https://github.com/apache/iceberg/blob/a582968975dd30ff4917fbbe999f1be903efac02/core/src/main/java/org/apache/iceberg/io/OutputFileFactory.java#L92-L101
         return f"00000-{self.task_id}-{self.write_uuid}.{extension}"
 
-    def generate_data_file_path(self, extension: str) -> str:
-        if self.partition_key:
-            file_path = f"{self.partition_key.to_path()}/{self.generate_data_file_filename(extension)}"
-            return file_path
-        else:
-            return self.generate_data_file_filename(extension)
-
 
 @dataclass(frozen=True)
 class AddFileTask:
diff --git a/pyiceberg/table/locations.py b/pyiceberg/table/locations.py
new file mode 100644
index 0000000000..046ee32527
--- /dev/null
+++ b/pyiceberg/table/locations.py
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import importlib
+import logging
+from abc import ABC, abstractmethod
+from typing import Optional
+
+import mmh3
+
+from pyiceberg.partitioning import PartitionKey
+from pyiceberg.table import TableProperties
+from pyiceberg.typedef import Properties
+from pyiceberg.utils.properties import property_as_bool
+
+logger = logging.getLogger(__name__)
+
+
+class LocationProvider(ABC):
+    """A base class for location providers, that provide data file locations for write tasks."""
+
+    table_location: str
+    table_properties: Properties
+
+    def __init__(self, table_location: str, table_properties: Properties):
+        self.table_location = table_location
+        self.table_properties = table_properties
+
+    @abstractmethod
+    def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
+        """Return a fully-qualified data file location for the given filename.
+
+        Args:
+            data_file_name (str): The name of the data file.
+            partition_key (Optional[PartitionKey]): The data file's partition key. If None, the data is not partitioned.
+
+        Returns:
+            str: A fully-qualified location URI for the data file.
+        """
+
+
+class SimpleLocationProvider(LocationProvider):
+    def __init__(self, table_location: str, table_properties: Properties):
+        super().__init__(table_location, table_properties)
+
+    def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
+        prefix = f"{self.table_location}/data"
+        return f"{prefix}/{partition_key.to_path()}/{data_file_name}" if partition_key else f"{prefix}/{data_file_name}"
+
+
+class ObjectStoreLocationProvider(LocationProvider):
+    HASH_BINARY_STRING_BITS = 20
+    ENTROPY_DIR_LENGTH = 4
+    ENTROPY_DIR_DEPTH = 3
+
+    _include_partition_paths: bool
+
+    def __init__(self, table_location: str, table_properties: Properties):
+        super().__init__(table_location, table_properties)
+        self._include_partition_paths = property_as_bool(
+            self.table_properties,
+            TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS,
+            TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT,
+        )
+
+    def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
+        if self._include_partition_paths and partition_key:
+            return self.new_data_location(f"{partition_key.to_path()}/{data_file_name}")
+
+        prefix = f"{self.table_location}/data"
+        hashed_path = self._compute_hash(data_file_name)
+
+        return (
+            f"{prefix}/{hashed_path}/{data_file_name}"
+            if self._include_partition_paths
+            else f"{prefix}/{hashed_path}-{data_file_name}"
+        )
+
+    @staticmethod
+    def _compute_hash(data_file_name: str) -> str:
+        # Bitwise AND to combat sign-extension; bitwise OR to preserve leading zeroes that `bin` would otherwise strip.
+        top_mask = 1 << ObjectStoreLocationProvider.HASH_BINARY_STRING_BITS
+        hash_code = mmh3.hash(data_file_name) & (top_mask - 1) | top_mask
+        return ObjectStoreLocationProvider._dirs_from_hash(bin(hash_code)[-ObjectStoreLocationProvider.HASH_BINARY_STRING_BITS :])
+
+    @staticmethod
+    def _dirs_from_hash(file_hash: str) -> str:
+        """Divides hash into directories for optimized orphan removal operation using ENTROPY_DIR_DEPTH and ENTROPY_DIR_LENGTH."""
+        total_entropy_length = ObjectStoreLocationProvider.ENTROPY_DIR_DEPTH * ObjectStoreLocationProvider.ENTROPY_DIR_LENGTH
+
+        hash_with_dirs = []
+        for i in range(0, total_entropy_length, ObjectStoreLocationProvider.ENTROPY_DIR_LENGTH):
+            hash_with_dirs.append(file_hash[i : i + ObjectStoreLocationProvider.ENTROPY_DIR_LENGTH])
+
+        if len(file_hash) > total_entropy_length:
+            hash_with_dirs.append(file_hash[total_entropy_length:])
+
+        return "/".join(hash_with_dirs)
+
+
+def _import_location_provider(
+    location_provider_impl: str, table_location: str, table_properties: Properties
+) -> Optional[LocationProvider]:
+    try:
+        path_parts = location_provider_impl.split(".")
+        if len(path_parts) < 2:
+            raise ValueError(
+                f"{TableProperties.WRITE_PY_LOCATION_PROVIDER_IMPL} should be full path (module.CustomLocationProvider), got: {location_provider_impl}"
+            )
+        module_name, class_name = ".".join(path_parts[:-1]), path_parts[-1]
+        module = importlib.import_module(module_name)
+        class_ = getattr(module, class_name)
+        return class_(table_location, table_properties)
+    except ModuleNotFoundError:
+        logger.warning("Could not initialize LocationProvider: %s", location_provider_impl)
+        return None
+
+
+def load_location_provider(table_location: str, table_properties: Properties) -> LocationProvider:
+    table_location = table_location.rstrip("/")
+
+    if location_provider_impl := table_properties.get(TableProperties.WRITE_PY_LOCATION_PROVIDER_IMPL):
+        if location_provider := _import_location_provider(location_provider_impl, table_location, table_properties):
+            logger.info("Loaded LocationProvider: %s", location_provider_impl)
+            return location_provider
+        else:
+            raise ValueError(f"Could not initialize LocationProvider: {location_provider_impl}")
+
+    if property_as_bool(table_properties, TableProperties.OBJECT_STORE_ENABLED, TableProperties.OBJECT_STORE_ENABLED_DEFAULT):
+        return ObjectStoreLocationProvider(table_location, table_properties)
+    else:
+        return SimpleLocationProvider(table_location, table_properties)
diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py
index 8a3a5c9acc..50a1bc8c38 100644
--- a/tests/integration/test_writes/test_partitioned_writes.py
+++ b/tests/integration/test_writes/test_partitioned_writes.py
@@ -28,6 +28,7 @@
 from pyiceberg.exceptions import NoSuchTableError
 from pyiceberg.partitioning import PartitionField, PartitionSpec
 from pyiceberg.schema import Schema
+from pyiceberg.table import TableProperties
 from pyiceberg.transforms import (
     BucketTransform,
     DayTransform,
@@ -280,6 +281,44 @@ def test_query_filter_v1_v2_append_null(
         assert df.where(f"{col} is null").count() == 2, f"Expected 2 null rows for {col}"
 
 
+@pytest.mark.integration
+@pytest.mark.parametrize(
+    "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamp", "timestamptz", "binary"]
+)
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_object_storage_location_provider_excludes_partition_path(
+    session_catalog: Catalog, spark: SparkSession, arrow_table_with_null: pa.Table, part_col: str, format_version: int
+) -> None:
+    nested_field = TABLE_SCHEMA.find_field(part_col)
+    partition_spec = PartitionSpec(
+        PartitionField(source_id=nested_field.field_id, field_id=1001, transform=IdentityTransform(), name=part_col)
+    )
+
+    tbl = _create_table(
+        session_catalog=session_catalog,
+        identifier=f"default.arrow_table_v{format_version}_with_null_partitioned_on_col_{part_col}",
+        # write.object-storage.partitioned-paths defaults to True
+        properties={"format-version": str(format_version), TableProperties.OBJECT_STORE_ENABLED: True},
+        data=[arrow_table_with_null],
+        partition_spec=partition_spec,
+    )
+
+    original_paths = tbl.inspect.data_files().to_pydict()["file_path"]
+    assert len(original_paths) == 3
+
+    # Update props to exclude partitioned paths and append data
+    with tbl.transaction() as tx:
+        tx.set_properties({TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS: False})
+    tbl.append(arrow_table_with_null)
+
+    added_paths = set(tbl.inspect.data_files().to_pydict()["file_path"]) - set(original_paths)
+    assert len(added_paths) == 3
+
+    # All paths before the props update should contain the partition, while all paths after should not
+    assert all(f"{part_col}=" in path for path in original_paths)
+    assert all(f"{part_col}=" not in path for path in added_paths)
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize(
     "spec",
diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py
index c23e836554..fff48b9373 100644
--- a/tests/integration/test_writes/test_writes.py
+++ b/tests/integration/test_writes/test_writes.py
@@ -285,6 +285,33 @@ def test_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_w
     assert [row.deleted_data_files_count for row in rows] == [0, 1, 0, 0, 0]
 
 
+@pytest.mark.integration
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_object_storage_data_files(
+    spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int
+) -> None:
+    tbl = _create_table(
+        session_catalog=session_catalog,
+        identifier="default.object_stored",
+        properties={"format-version": format_version, TableProperties.OBJECT_STORE_ENABLED: True},
+        data=[arrow_table_with_null],
+    )
+    tbl.append(arrow_table_with_null)
+
+    paths = tbl.inspect.data_files().to_pydict()["file_path"]
+    assert len(paths) == 2
+
+    for location in paths:
+        assert location.startswith("s3://warehouse/default/object_stored/data/")
+        parts = location.split("/")
+        assert len(parts) == 11
+
+        # Entropy binary directories should have been injected
+        for dir_name in parts[6:10]:
+            assert dir_name
+            assert all(c in "01" for c in dir_name)
+
+
 @pytest.mark.integration
 def test_python_writes_with_spark_snapshot_reads(
     spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table
diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py
new file mode 100644
index 0000000000..bda2442aca
--- /dev/null
+++ b/tests/table/test_locations.py
@@ -0,0 +1,130 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from typing import Optional
+
+import pytest
+
+from pyiceberg.partitioning import PartitionField, PartitionFieldValue, PartitionKey, PartitionSpec
+from pyiceberg.schema import Schema
+from pyiceberg.table.locations import LocationProvider, load_location_provider
+from pyiceberg.transforms import IdentityTransform
+from pyiceberg.typedef import EMPTY_DICT
+from pyiceberg.types import NestedField, StringType
+
+PARTITION_FIELD = PartitionField(source_id=1, field_id=1002, transform=IdentityTransform(), name="string_field")
+PARTITION_KEY = PartitionKey(
+    raw_partition_field_values=[PartitionFieldValue(PARTITION_FIELD, "example_string")],
+    partition_spec=PartitionSpec(PARTITION_FIELD),
+    schema=Schema(NestedField(field_id=1, name="string_field", field_type=StringType(), required=False)),
+)
+
+
+class CustomLocationProvider(LocationProvider):
+    def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
+        return f"custom_location_provider/{data_file_name}"
+
+
+def test_default_location_provider() -> None:
+    provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
+
+    assert provider.new_data_location("my_file") == "table_location/data/my_file"
+
+
+def test_custom_location_provider() -> None:
+    qualified_name = CustomLocationProvider.__module__ + "." + CustomLocationProvider.__name__
+    provider = load_location_provider(
+        table_location="table_location", table_properties={"write.py-location-provider.impl": qualified_name}
+    )
+
+    assert provider.new_data_location("my_file") == "custom_location_provider/my_file"
+
+
+def test_custom_location_provider_single_path() -> None:
+    with pytest.raises(ValueError, match=r"write\.py-location-provider\.impl should be full path"):
+        load_location_provider(table_location="table_location", table_properties={"write.py-location-provider.impl": "not_found"})
+
+
+def test_custom_location_provider_not_found() -> None:
+    with pytest.raises(ValueError, match=r"Could not initialize LocationProvider"):
+        load_location_provider(
+            table_location="table_location", table_properties={"write.py-location-provider.impl": "module.not_found"}
+        )
+
+
+def test_object_storage_injects_entropy() -> None:
+    provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
+
+    location = provider.new_data_location("test.parquet")
+    parts = location.split("/")
+
+    assert len(parts) == 7
+    assert parts[0] == "table_location"
+    assert parts[1] == "data"
+    assert parts[-1] == "test.parquet"
+
+    # Entropy directories in the middle
+    for dir_name in parts[2:-1]:
+        assert dir_name
+        assert all(c in "01" for c in dir_name)
+
+
+@pytest.mark.parametrize("object_storage", [True, False])
+def test_partition_value_in_path(object_storage: bool) -> None:
+    provider = load_location_provider(
+        table_location="table_location",
+        table_properties={
+            "write.object-storage.enabled": str(object_storage),
+        },
+    )
+
+    location = provider.new_data_location("test.parquet", PARTITION_KEY)
+    partition_segment = location.split("/")[-2]
+
+    assert partition_segment == "string_field=example_string"
+
+
+# NB: We test here with None partition key too because disabling partitioned paths still replaces final / with - even in
+# paths of un-partitioned files. This matches the behaviour of the Java implementation.
+@pytest.mark.parametrize("partition_key", [PARTITION_KEY, None])
+def test_object_storage_partitioned_paths_disabled(partition_key: Optional[PartitionKey]) -> None:
+    provider = load_location_provider(
+        table_location="table_location",
+        table_properties={
+            "write.object-storage.enabled": "true",
+            "write.object-storage.partitioned-paths": "false",
+        },
+    )
+
+    location = provider.new_data_location("test.parquet", partition_key)
+
+    # No partition values included in the path and last part of entropy is separated with "-"
+    assert location == "table_location/data/0110/1010/0011/11101000-test.parquet"
+
+
+@pytest.mark.parametrize(
+    ["data_file_name", "expected_hash"],
+    [
+        ("a", "0101/0110/1001/10110010"),
+        ("b", "1110/0111/1110/00000011"),
+        ("c", "0010/1101/0110/01011111"),
+        ("d", "1001/0001/0100/01110011"),
+    ],
+)
+def test_hash_injection(data_file_name: str, expected_hash: str) -> None:
+    provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
+
+    assert provider.new_data_location(data_file_name) == f"table_location/data/{expected_hash}/{data_file_name}"

From cad0ad7d9358315abe1315de2a64227d91acceaa Mon Sep 17 00:00:00 2001
From: Soumya Ghosh <ghoshsoumya92@gmail.com>
Date: Sat, 11 Jan 2025 06:41:46 +0530
Subject: [PATCH 25/32] Add `all_manifests` metadata table with tests (#1241)

* Add `all_manifests` metadata table with tests

* Move get_manifests_schema and get_all_manifests_schema to InspectTable class

* Update tests for all_manifests table

* Added linter changes in inspect.py
---
 pyiceberg/table/inspect.py              | 75 +++++++++++++-------
 tests/integration/test_inspect_table.py | 92 +++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 24 deletions(-)

diff --git a/pyiceberg/table/inspect.py b/pyiceberg/table/inspect.py
index 71d38a2279..6dfa78a7ac 100644
--- a/pyiceberg/table/inspect.py
+++ b/pyiceberg/table/inspect.py
@@ -17,13 +17,14 @@
 from __future__ import annotations
 
 from datetime import datetime, timezone
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Set, Tuple
 
 from pyiceberg.conversions import from_bytes
 from pyiceberg.manifest import DataFile, DataFileContent, ManifestContent, PartitionFieldSummary
 from pyiceberg.partitioning import PartitionSpec
 from pyiceberg.table.snapshots import Snapshot, ancestors_of
 from pyiceberg.types import PrimitiveType
+from pyiceberg.utils.concurrent import ExecutorFactory
 from pyiceberg.utils.singleton import _convert_to_hashable_type
 
 if TYPE_CHECKING:
@@ -346,7 +347,7 @@ def update_partitions_map(
             schema=table_schema,
         )
 
-    def manifests(self) -> "pa.Table":
+    def _get_manifests_schema(self) -> "pa.Schema":
         import pyarrow as pa
 
         partition_summary_schema = pa.struct(
@@ -374,6 +375,17 @@ def manifests(self) -> "pa.Table":
                 pa.field("partition_summaries", pa.list_(partition_summary_schema), nullable=False),
             ]
         )
+        return manifest_schema
+
+    def _get_all_manifests_schema(self) -> "pa.Schema":
+        import pyarrow as pa
+
+        all_manifests_schema = self._get_manifests_schema()
+        all_manifests_schema = all_manifests_schema.append(pa.field("reference_snapshot_id", pa.int64(), nullable=False))
+        return all_manifests_schema
+
+    def _generate_manifests_table(self, snapshot: Optional[Snapshot], is_all_manifests_table: bool = False) -> "pa.Table":
+        import pyarrow as pa
 
         def _partition_summaries_to_rows(
             spec: PartitionSpec, partition_summaries: List[PartitionFieldSummary]
@@ -412,36 +424,38 @@ def _partition_summaries_to_rows(
 
         specs = self.tbl.metadata.specs()
         manifests = []
-        if snapshot := self.tbl.metadata.current_snapshot():
+        if snapshot:
             for manifest in snapshot.manifests(self.tbl.io):
                 is_data_file = manifest.content == ManifestContent.DATA
                 is_delete_file = manifest.content == ManifestContent.DELETES
-                manifests.append(
-                    {
-                        "content": manifest.content,
-                        "path": manifest.manifest_path,
-                        "length": manifest.manifest_length,
-                        "partition_spec_id": manifest.partition_spec_id,
-                        "added_snapshot_id": manifest.added_snapshot_id,
-                        "added_data_files_count": manifest.added_files_count if is_data_file else 0,
-                        "existing_data_files_count": manifest.existing_files_count if is_data_file else 0,
-                        "deleted_data_files_count": manifest.deleted_files_count if is_data_file else 0,
-                        "added_delete_files_count": manifest.added_files_count if is_delete_file else 0,
-                        "existing_delete_files_count": manifest.existing_files_count if is_delete_file else 0,
-                        "deleted_delete_files_count": manifest.deleted_files_count if is_delete_file else 0,
-                        "partition_summaries": _partition_summaries_to_rows(
-                            specs[manifest.partition_spec_id], manifest.partitions
-                        )
-                        if manifest.partitions
-                        else [],
-                    }
-                )
+                manifest_row = {
+                    "content": manifest.content,
+                    "path": manifest.manifest_path,
+                    "length": manifest.manifest_length,
+                    "partition_spec_id": manifest.partition_spec_id,
+                    "added_snapshot_id": manifest.added_snapshot_id,
+                    "added_data_files_count": manifest.added_files_count if is_data_file else 0,
+                    "existing_data_files_count": manifest.existing_files_count if is_data_file else 0,
+                    "deleted_data_files_count": manifest.deleted_files_count if is_data_file else 0,
+                    "added_delete_files_count": manifest.added_files_count if is_delete_file else 0,
+                    "existing_delete_files_count": manifest.existing_files_count if is_delete_file else 0,
+                    "deleted_delete_files_count": manifest.deleted_files_count if is_delete_file else 0,
+                    "partition_summaries": _partition_summaries_to_rows(specs[manifest.partition_spec_id], manifest.partitions)
+                    if manifest.partitions
+                    else [],
+                }
+                if is_all_manifests_table:
+                    manifest_row["reference_snapshot_id"] = snapshot.snapshot_id
+                manifests.append(manifest_row)
 
         return pa.Table.from_pylist(
             manifests,
-            schema=manifest_schema,
+            schema=self._get_all_manifests_schema() if is_all_manifests_table else self._get_manifests_schema(),
         )
 
+    def manifests(self) -> "pa.Table":
+        return self._generate_manifests_table(self.tbl.current_snapshot())
+
     def metadata_log_entries(self) -> "pa.Table":
         import pyarrow as pa
 
@@ -630,3 +644,16 @@ def data_files(self, snapshot_id: Optional[int] = None) -> "pa.Table":
 
     def delete_files(self, snapshot_id: Optional[int] = None) -> "pa.Table":
         return self._files(snapshot_id, {DataFileContent.POSITION_DELETES, DataFileContent.EQUALITY_DELETES})
+
+    def all_manifests(self) -> "pa.Table":
+        import pyarrow as pa
+
+        snapshots = self.tbl.snapshots()
+        if not snapshots:
+            return pa.Table.from_pylist([], schema=self._get_all_manifests_schema())
+
+        executor = ExecutorFactory.get_or_create()
+        manifests_by_snapshots: Iterator["pa.Table"] = executor.map(
+            lambda args: self._generate_manifests_table(*args), [(snapshot, True) for snapshot in snapshots]
+        )
+        return pa.concat_tables(manifests_by_snapshots)
diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py
index 68b10f3262..75fe92a69a 100644
--- a/tests/integration/test_inspect_table.py
+++ b/tests/integration/test_inspect_table.py
@@ -846,3 +846,95 @@ def inspect_files_asserts(df: pa.Table) -> None:
     inspect_files_asserts(files_df)
     inspect_files_asserts(data_files_df)
     inspect_files_asserts(delete_files_df)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_inspect_all_manifests(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None:
+    from pandas.testing import assert_frame_equal
+
+    identifier = "default.table_metadata_all_manifests"
+    try:
+        session_catalog.drop_table(identifier=identifier)
+    except NoSuchTableError:
+        pass
+
+    spark.sql(
+        f"""
+        CREATE TABLE {identifier} (
+            id int,
+            data string
+        )
+        PARTITIONED BY (data)
+        TBLPROPERTIES ('write.update.mode'='merge-on-read',
+                       'write.delete.mode'='merge-on-read')
+    """
+    )
+    tbl = session_catalog.load_table(identifier)
+
+    # check all_manifests when there are no snapshots
+    lhs = tbl.inspect.all_manifests().to_pandas()
+    rhs = spark.table(f"{identifier}.all_manifests").toPandas()
+    assert_frame_equal(lhs, rhs, check_dtype=False)
+
+    spark.sql(f"INSERT INTO {identifier} VALUES (1, 'a')")
+
+    spark.sql(f"INSERT INTO {identifier} VALUES (2, 'b')")
+
+    spark.sql(f"UPDATE {identifier} SET data = 'c' WHERE id = 1")
+
+    spark.sql(f"DELETE FROM {identifier} WHERE id = 2")
+
+    spark.sql(f"INSERT OVERWRITE {identifier} VALUES (1, 'a')")
+
+    tbl.refresh()
+    df = tbl.inspect.all_manifests()
+
+    assert df.column_names == [
+        "content",
+        "path",
+        "length",
+        "partition_spec_id",
+        "added_snapshot_id",
+        "added_data_files_count",
+        "existing_data_files_count",
+        "deleted_data_files_count",
+        "added_delete_files_count",
+        "existing_delete_files_count",
+        "deleted_delete_files_count",
+        "partition_summaries",
+        "reference_snapshot_id",
+    ]
+
+    int_cols = [
+        "content",
+        "length",
+        "partition_spec_id",
+        "added_snapshot_id",
+        "added_data_files_count",
+        "existing_data_files_count",
+        "deleted_data_files_count",
+        "added_delete_files_count",
+        "existing_delete_files_count",
+        "deleted_delete_files_count",
+        "reference_snapshot_id",
+    ]
+
+    for column in int_cols:
+        for value in df[column]:
+            assert isinstance(value.as_py(), int)
+
+    for value in df["path"]:
+        assert isinstance(value.as_py(), str)
+
+    for value in df["partition_summaries"]:
+        assert isinstance(value.as_py(), list)
+        for row in value:
+            assert isinstance(row["contains_null"].as_py(), bool)
+            assert isinstance(row["contains_nan"].as_py(), (bool, type(None)))
+            assert isinstance(row["lower_bound"].as_py(), (str, type(None)))
+            assert isinstance(row["upper_bound"].as_py(), (str, type(None)))
+
+    lhs = spark.table(f"{identifier}.all_manifests").toPandas()
+    rhs = df.to_pandas()
+    assert_frame_equal(lhs, rhs, check_dtype=False)

From aface466f3393c8999bb5e2d90d9ff628044010c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 11 Jan 2025 14:13:05 -0500
Subject: [PATCH 26/32] Build: Bump deptry from 0.21.2 to 0.22.0 (#1508)

Bumps [deptry](https://github.com/fpgmaas/deptry) from 0.21.2 to 0.22.0.
- [Release notes](https://github.com/fpgmaas/deptry/releases)
- [Changelog](https://github.com/fpgmaas/deptry/blob/main/CHANGELOG.md)
- [Commits](https://github.com/fpgmaas/deptry/compare/0.21.2...0.22.0)

---
updated-dependencies:
- dependency-name: deptry
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock    | 36 ++++++++++++++++++------------------
 pyproject.toml |  2 +-
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 687ff5a3a8..58e36274bf 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1063,27 +1063,27 @@ files = [
 
 [[package]]
 name = "deptry"
-version = "0.21.2"
+version = "0.22.0"
 description = "A command line utility to check for unused, missing and transitive dependencies in a Python project."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "deptry-0.21.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e3b9e0c5ee437240b65e61107b5777a12064f78f604bf9f181a96c9b56eb896d"},
-    {file = "deptry-0.21.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:d76bbf48bd62ecc44ca3d414769bd4b7956598d23d9ccb42fd359b831a31cab2"},
-    {file = "deptry-0.21.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3080bb88c16ebd35f59cba7688416115b7aaf4630dc5a051dff2649cbf129a1b"},
-    {file = "deptry-0.21.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adb12d6678fb5dbd320a0a2e37881059d0a45bec6329df4250c977d803fe7f96"},
-    {file = "deptry-0.21.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:7479d3079be69c3bbf5913d8e21090749c1139ee91f81520ffce90b5322476b0"},
-    {file = "deptry-0.21.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:019167b35301edd2bdd4719c8b8f44769be4507cb8a1cd46fff4393cdbe8d31b"},
-    {file = "deptry-0.21.2-cp39-abi3-win_amd64.whl", hash = "sha256:d8add495f0dd19a38aa6d1e09b14b1441bca47c9d945bc7b322efb084313eea3"},
-    {file = "deptry-0.21.2-cp39-abi3-win_arm64.whl", hash = "sha256:06d48e9fa460aad02f9e1b079d9f5a69d622d291b3a0525b722fc91c88032042"},
-    {file = "deptry-0.21.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3ef8aed33a2eac357f9565063bc1257bcefa03a37038299c08a4222e28f3cd34"},
-    {file = "deptry-0.21.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:917745db5f8295eb5048e43d9073a9a675ffdba865e9b294d2e7aa455730cb06"},
-    {file = "deptry-0.21.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:186ddbc69c1f70e684e83e202795e1054d0c2dfc03b8acc077f65dc3b6a7f4ce"},
-    {file = "deptry-0.21.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3686e86ad7063b5a6e5253454f9d9e4a7a6b1511a99bd4306fda5424480be48"},
-    {file = "deptry-0.21.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1012a88500f242489066f811f6ec0c93328d9340bbf0f87f0c7d2146054d197e"},
-    {file = "deptry-0.21.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:769bb658172586d1b03046bdc6b6c94f6a98ecfbac04ff7f77ec61768c75e1c2"},
-    {file = "deptry-0.21.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fb2f43747b58abeec01dc277ef22859342f3bca2ac677818c94940a009b436c0"},
-    {file = "deptry-0.21.2.tar.gz", hash = "sha256:4e870553c7a1fafcd99a83ba4137259525679eecabeff61bc669741efa201541"},
+    {file = "deptry-0.22.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2b903c94162e30640bb7a3e6800c7afd03a6bb12b693a21290e06c713dba35af"},
+    {file = "deptry-0.22.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8b523a33bed952679c97a9f55c690803f0fbeb32649946dcc1362c3f015897c7"},
+    {file = "deptry-0.22.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c68fa570be1443888d252c6f551356777e56e82e492e68e6db3d65b31100c450"},
+    {file = "deptry-0.22.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:016f8a5b6c32762beea47a4d9d2d7b04f1b6e534448e5444c7a742bd2fdb260d"},
+    {file = "deptry-0.22.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:46c868a0493556b41096f9824a15a3ce38811e6b4a2699ebec16e06e9f85cd84"},
+    {file = "deptry-0.22.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:aebba0d1ca119f6241ff0d5b72e72a9b912fa880e81f4ab346a32d9001d6ddb1"},
+    {file = "deptry-0.22.0-cp39-abi3-win_amd64.whl", hash = "sha256:2da497a9888f930b5c86c6524b29a4d284ed320edd4148ecc2e45e10f177f4fe"},
+    {file = "deptry-0.22.0-cp39-abi3-win_arm64.whl", hash = "sha256:35acf2ac783ba2ec43ba593ba14e0080393c0ab24797ba55fbed30f0ba02259f"},
+    {file = "deptry-0.22.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9db9d0b8244e2b20bd75a21312c35ee628a602b00c0e2f267fb90f4600de6d2d"},
+    {file = "deptry-0.22.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:edd0060065325cd70e6ce47feaa724cdb7fc3f4de673e4ed0fa38e8c1adc4155"},
+    {file = "deptry-0.22.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b371a3c3194c2db9196ab1f80d5ce08138dea731eff8dd9fb2997da42941fa7"},
+    {file = "deptry-0.22.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e20a8ba89078d06440316dba719c2278fdb19923e76633b808fd1b5670020c4"},
+    {file = "deptry-0.22.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f4872f48225d1e7dbacb1be5e427945c8f76abf6b91453e038aae076b638ba01"},
+    {file = "deptry-0.22.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:9a12ebe86299e7bb054804464467f33c49e5a34f204b710fa10fbe1f31c56964"},
+    {file = "deptry-0.22.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fbe6211b972337acdeec6c11a82b666597c1edd6c6e2a93eb705bf49644bfb08"},
+    {file = "deptry-0.22.0.tar.gz", hash = "sha256:32212cd40562f71b24da69babaed9a4233c567da390f681d86bb66f8ec4d2bfe"},
 ]
 
 [package.dependencies]
@@ -5357,4 +5357,4 @@ zstandard = ["zstandard"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9, !=3.9.7"
-content-hash = "59e5678cd718f658c5bd099c03051564ee60f991e5f222bf92da13d1dd025a42"
+content-hash = "6879624132285053b73c134d72db38b6dace947c67788387a2042d6c78569970"
diff --git a/pyproject.toml b/pyproject.toml
index 56be937305..db84bd27f4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,7 +93,7 @@ typing-extensions = "4.12.2"
 pytest-mock = "3.14.0"
 pyspark = "3.5.3"
 cython = "3.0.11"
-deptry = ">=0.14,<0.22"
+deptry = ">=0.14,<0.23"
 docutils = "!=0.21.post1"   # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520
 
 [tool.poetry.group.docs.dependencies]

From c409678ffb81e22f23fbed1561373a2b8e47cc86 Mon Sep 17 00:00:00 2001
From: smaheshwar-pltr <maheshwarsreesh@gmail.com>
Date: Mon, 13 Jan 2025 14:52:54 +0000
Subject: [PATCH 27/32] Use `ObjectStoreLocationProvider` by default (#1509)

* Make object storage the default location provider

* Nit: Remove comment beside property to prefer docs

- Removed table proper

* Nit: Add asserts for table properties defaults as well as comment in test

---------

Co-authored-by: Sreesh Maheshwar <smaheshwar@palantir.com>
---
 pyiceberg/table/__init__.py                              | 2 +-
 tests/integration/test_writes/test_partitioned_writes.py | 6 ++++--
 tests/table/test_locations.py                            | 7 +++----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 0c8c848c43..f2df84d7ee 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -190,7 +190,7 @@ class TableProperties:
     WRITE_PY_LOCATION_PROVIDER_IMPL = "write.py-location-provider.impl"
 
     OBJECT_STORE_ENABLED = "write.object-storage.enabled"
-    OBJECT_STORE_ENABLED_DEFAULT = False
+    OBJECT_STORE_ENABLED_DEFAULT = True
 
     WRITE_OBJECT_STORE_PARTITIONED_PATHS = "write.object-storage.partitioned-paths"
     WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT = True
diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py
index 50a1bc8c38..9e7632852c 100644
--- a/tests/integration/test_writes/test_partitioned_writes.py
+++ b/tests/integration/test_writes/test_partitioned_writes.py
@@ -294,11 +294,13 @@ def test_object_storage_location_provider_excludes_partition_path(
         PartitionField(source_id=nested_field.field_id, field_id=1001, transform=IdentityTransform(), name=part_col)
     )
 
+    # write.object-storage.enabled and write.object-storage.partitioned-paths don't need to be specified as they're on by default
+    assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT
+    assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT
     tbl = _create_table(
         session_catalog=session_catalog,
         identifier=f"default.arrow_table_v{format_version}_with_null_partitioned_on_col_{part_col}",
-        # write.object-storage.partitioned-paths defaults to True
-        properties={"format-version": str(format_version), TableProperties.OBJECT_STORE_ENABLED: True},
+        properties={"format-version": str(format_version)},
         data=[arrow_table_with_null],
         partition_spec=partition_spec,
     )
diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py
index bda2442aca..6753fe5a26 100644
--- a/tests/table/test_locations.py
+++ b/tests/table/test_locations.py
@@ -39,7 +39,7 @@ def new_data_location(self, data_file_name: str, partition_key: Optional[Partiti
 
 
 def test_default_location_provider() -> None:
-    provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
+    provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})
 
     assert provider.new_data_location("my_file") == "table_location/data/my_file"
 
@@ -66,7 +66,7 @@ def test_custom_location_provider_not_found() -> None:
 
 
 def test_object_storage_injects_entropy() -> None:
-    provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
+    provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
 
     location = provider.new_data_location("test.parquet")
     parts = location.split("/")
@@ -104,7 +104,6 @@ def test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti
     provider = load_location_provider(
         table_location="table_location",
         table_properties={
-            "write.object-storage.enabled": "true",
             "write.object-storage.partitioned-paths": "false",
         },
     )
@@ -125,6 +124,6 @@ def test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti
     ],
 )
 def test_hash_injection(data_file_name: str, expected_hash: str) -> None:
-    provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
+    provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
 
     assert provider.new_data_location(data_file_name) == f"table_location/data/{expected_hash}/{data_file_name}"

From a09bcde43c40e0a582fbfeb1e971aa52278c99c5 Mon Sep 17 00:00:00 2001
From: smaheshwar-pltr <maheshwarsreesh@gmail.com>
Date: Mon, 13 Jan 2025 17:38:47 +0000
Subject: [PATCH 28/32] Improve `LocationProvider` unit tests (#1511)

* Improve `LocationProvider` unit tests

* Renamed `test_object_storage_injects_entropy` to test_object_storage_no_partition

---------

Co-authored-by: Sreesh Maheshwar <smaheshwar@palantir.com>
---
 tests/table/test_locations.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py
index 6753fe5a26..67911b6271 100644
--- a/tests/table/test_locations.py
+++ b/tests/table/test_locations.py
@@ -38,12 +38,18 @@ def new_data_location(self, data_file_name: str, partition_key: Optional[Partiti
         return f"custom_location_provider/{data_file_name}"
 
 
-def test_default_location_provider() -> None:
+def test_simple_location_provider_no_partition() -> None:
     provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})
 
     assert provider.new_data_location("my_file") == "table_location/data/my_file"
 
 
+def test_simple_location_provider_with_partition() -> None:
+    provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})
+
+    assert provider.new_data_location("my_file", PARTITION_KEY) == "table_location/data/string_field=example_string/my_file"
+
+
 def test_custom_location_provider() -> None:
     qualified_name = CustomLocationProvider.__module__ + "." + CustomLocationProvider.__name__
     provider = load_location_provider(
@@ -65,7 +71,7 @@ def test_custom_location_provider_not_found() -> None:
         )
 
 
-def test_object_storage_injects_entropy() -> None:
+def test_object_storage_no_partition() -> None:
     provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
 
     location = provider.new_data_location("test.parquet")
@@ -82,19 +88,18 @@ def test_object_storage_injects_entropy() -> None:
         assert all(c in "01" for c in dir_name)
 
 
-@pytest.mark.parametrize("object_storage", [True, False])
-def test_partition_value_in_path(object_storage: bool) -> None:
+def test_object_storage_with_partition() -> None:
     provider = load_location_provider(
         table_location="table_location",
-        table_properties={
-            "write.object-storage.enabled": str(object_storage),
-        },
+        table_properties={"write.object-storage.enabled": "true"},
     )
 
     location = provider.new_data_location("test.parquet", PARTITION_KEY)
-    partition_segment = location.split("/")[-2]
 
-    assert partition_segment == "string_field=example_string"
+    # Partition values AND entropy included in the path. Entropy differs to that in the test below because the partition
+    # key AND the data file name are used as the hash input. This matches Java behaviour; the hash below is what the
+    # Java implementation produces for this input too.
+    assert location == "table_location/data/0001/0010/1001/00000011/string_field=example_string/test.parquet"
 
 
 # NB: We test here with None partition key too because disabling partitioned paths still replaces final / with - even in

From 61b3510ded32270418ad54f5204113000d3dd07f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 14 Jan 2025 09:03:43 +0100
Subject: [PATCH 29/32] Build: Bump mkdocs-autorefs from 1.2.0 to 1.3.0 (#1513)

Bumps [mkdocs-autorefs](https://github.com/mkdocstrings/autorefs) from 1.2.0 to 1.3.0.
- [Release notes](https://github.com/mkdocstrings/autorefs/releases)
- [Changelog](https://github.com/mkdocstrings/autorefs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/mkdocstrings/autorefs/compare/1.2.0...1.3.0)

---
updated-dependencies:
- dependency-name: mkdocs-autorefs
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock    | 10 +++++-----
 pyproject.toml |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 58e36274bf..b67371ecbd 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2327,13 +2327,13 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp
 
 [[package]]
 name = "mkdocs-autorefs"
-version = "1.2.0"
+version = "1.3.0"
 description = "Automatically link across pages in MkDocs."
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "mkdocs_autorefs-1.2.0-py3-none-any.whl", hash = "sha256:d588754ae89bd0ced0c70c06f58566a4ee43471eeeee5202427da7de9ef85a2f"},
-    {file = "mkdocs_autorefs-1.2.0.tar.gz", hash = "sha256:a86b93abff653521bda71cf3fc5596342b7a23982093915cb74273f67522190f"},
+    {file = "mkdocs_autorefs-1.3.0-py3-none-any.whl", hash = "sha256:d180f9778a04e78b7134e31418f238bba56f56d6a8af97873946ff661befffb3"},
+    {file = "mkdocs_autorefs-1.3.0.tar.gz", hash = "sha256:6867764c099ace9025d6ac24fd07b85a98335fbd30107ef01053697c8f46db61"},
 ]
 
 [package.dependencies]
@@ -5357,4 +5357,4 @@ zstandard = ["zstandard"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9, !=3.9.7"
-content-hash = "6879624132285053b73c134d72db38b6dace947c67788387a2042d6c78569970"
+content-hash = "306213628bcc69346e14742843c8e6bccf19c2615886943c2e1482a954a388ec"
diff --git a/pyproject.toml b/pyproject.toml
index db84bd27f4..4b425141b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -104,7 +104,7 @@ jinja2 = "3.1.5"
 mkdocstrings = "0.27.0"
 mkdocstrings-python = "1.13.0"
 mkdocs-literate-nav = "0.6.1"
-mkdocs-autorefs = "1.2.0"
+mkdocs-autorefs = "1.3.0"
 mkdocs-gen-files = "0.5.0"
 mkdocs-material = "9.5.49"
 mkdocs-material-extensions = "1.3.1"

From 4e755996c11e1768a63d3f3f663bfa77994648b7 Mon Sep 17 00:00:00 2001
From: hgollakota <43627229+hgollakota@users.noreply.github.com>
Date: Wed, 15 Jan 2025 11:21:24 -0500
Subject: [PATCH 30/32] Add support for lowercase `FileFormat`(#1362)

* Added support for lowercase FileFormat

Modified the FileFormat class so that it utilizes EnumMeta value aliases. This allows both "AVRO" and "avro" to map to AVRO.

* Make mypy happy

---------

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 pyiceberg/manifest.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index 5a32a6330c..598d88cdd8 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -94,9 +94,16 @@ def __repr__(self) -> str:
 
 
 class FileFormat(str, Enum):
-    AVRO = "AVRO"
-    PARQUET = "PARQUET"
-    ORC = "ORC"
+    AVRO = "AVRO", "avro"
+    PARQUET = "PARQUET", "parquet"
+    ORC = "ORC", "orc"
+
+    def __new__(cls, value: str, *value_aliases: List[str]) -> "FileFormat":
+        obj = str.__new__(cls)
+        obj._value_ = value
+        for alias in value_aliases:
+            cls._value2member_map_[alias] = obj
+        return obj
 
     @classmethod
     def _missing_(cls, value: object) -> Union[None, str]:

From 46253f353a57cb8547ef53a7d17a0161341636c0 Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@apache.org>
Date: Wed, 15 Jan 2025 21:19:00 +0100
Subject: [PATCH 31/32] Revert "Add support for lowercase `FileFormat`(#1362)"
 (#1518)

This reverts commit 4e755996c11e1768a63d3f3f663bfa77994648b7.
---
 pyiceberg/manifest.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index 598d88cdd8..5a32a6330c 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -94,16 +94,9 @@ def __repr__(self) -> str:
 
 
 class FileFormat(str, Enum):
-    AVRO = "AVRO", "avro"
-    PARQUET = "PARQUET", "parquet"
-    ORC = "ORC", "orc"
-
-    def __new__(cls, value: str, *value_aliases: List[str]) -> "FileFormat":
-        obj = str.__new__(cls)
-        obj._value_ = value
-        for alias in value_aliases:
-            cls._value2member_map_[alias] = obj
-        return obj
+    AVRO = "AVRO"
+    PARQUET = "PARQUET"
+    ORC = "ORC"
 
     @classmethod
     def _missing_(cls, value: object) -> Union[None, str]:

From b806cfa34dbeca89939e20e2f8f1ef467a6381e2 Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@apache.org>
Date: Wed, 15 Jan 2025 21:32:27 +0100
Subject: [PATCH 32/32] IO: Remove deprecations (#1519)

---
 pyiceberg/io/__init__.py |  9 ------
 pyiceberg/io/fsspec.py   | 68 +++++-----------------------------------
 pyiceberg/io/pyarrow.py  | 10 +-----
 3 files changed, 9 insertions(+), 78 deletions(-)

diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
index 40186069d4..f322221e4b 100644
--- a/pyiceberg/io/__init__.py
+++ b/pyiceberg/io/__init__.py
@@ -48,14 +48,6 @@
 
 logger = logging.getLogger(__name__)
 
-ADLFS_CONNECTION_STRING = "adlfs.connection-string"
-ADLFS_ACCOUNT_NAME = "adlfs.account-name"
-ADLFS_ACCOUNT_KEY = "adlfs.account-key"
-ADLFS_SAS_TOKEN = "adlfs.sas-token"
-ADLFS_TENANT_ID = "adlfs.tenant-id"
-ADLFS_CLIENT_ID = "adlfs.client-id"
-ADLFS_ClIENT_SECRET = "adlfs.client-secret"
-ADLFS_PREFIX = "adlfs"
 AWS_REGION = "client.region"
 AWS_ACCESS_KEY_ID = "client.access-key-id"
 AWS_SECRET_ACCESS_KEY = "client.secret-access-key"
@@ -94,7 +86,6 @@
 GCS_CACHE_TIMEOUT = "gcs.cache-timeout"
 GCS_REQUESTER_PAYS = "gcs.requester-pays"
 GCS_SESSION_KWARGS = "gcs.session-kwargs"
-GCS_ENDPOINT = "gcs.endpoint"
 GCS_SERVICE_HOST = "gcs.service.host"
 GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
 GCS_VERSION_AWARE = "gcs.version-aware"
diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py
index 23796d4e6a..62e9b92342 100644
--- a/pyiceberg/io/fsspec.py
+++ b/pyiceberg/io/fsspec.py
@@ -40,13 +40,6 @@
 from pyiceberg.catalog import TOKEN
 from pyiceberg.exceptions import SignError
 from pyiceberg.io import (
-    ADLFS_ACCOUNT_KEY,
-    ADLFS_ACCOUNT_NAME,
-    ADLFS_CLIENT_ID,
-    ADLFS_CONNECTION_STRING,
-    ADLFS_PREFIX,
-    ADLFS_SAS_TOKEN,
-    ADLFS_TENANT_ID,
     ADLS_ACCOUNT_KEY,
     ADLS_ACCOUNT_NAME,
     ADLS_CLIENT_ID,
@@ -61,7 +54,6 @@
     GCS_CACHE_TIMEOUT,
     GCS_CONSISTENCY,
     GCS_DEFAULT_LOCATION,
-    GCS_ENDPOINT,
     GCS_PROJECT_ID,
     GCS_REQUESTER_PAYS,
     GCS_SERVICE_HOST,
@@ -78,7 +70,6 @@
     S3_SIGNER_ENDPOINT,
     S3_SIGNER_ENDPOINT_DEFAULT,
     S3_SIGNER_URI,
-    ADLFS_ClIENT_SECRET,
     ADLS_ClIENT_SECRET,
     FileIO,
     InputFile,
@@ -87,7 +78,6 @@
     OutputStream,
 )
 from pyiceberg.typedef import Properties
-from pyiceberg.utils.deprecated import deprecation_message
 from pyiceberg.utils.properties import get_first_property_value, property_as_bool
 
 logger = logging.getLogger(__name__)
@@ -172,12 +162,6 @@ def _gs(properties: Properties) -> AbstractFileSystem:
     # https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
     from gcsfs import GCSFileSystem
 
-    if properties.get(GCS_ENDPOINT):
-        deprecation_message(
-            deprecated_in="0.8.0",
-            removed_in="0.9.0",
-            help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
-        )
     return GCSFileSystem(
         project=properties.get(GCS_PROJECT_ID),
         access=properties.get(GCS_ACCESS, "full_control"),
@@ -186,7 +170,7 @@ def _gs(properties: Properties) -> AbstractFileSystem:
         cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
         requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
         session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
-        endpoint_url=get_first_property_value(properties, GCS_SERVICE_HOST, GCS_ENDPOINT),
+        endpoint_url=properties.get(GCS_SERVICE_HOST),
         default_location=properties.get(GCS_DEFAULT_LOCATION),
         version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
     )
@@ -195,50 +179,14 @@ def _gs(properties: Properties) -> AbstractFileSystem:
 def _adls(properties: Properties) -> AbstractFileSystem:
     from adlfs import AzureBlobFileSystem
 
-    for property_name in properties:
-        if property_name.startswith(ADLFS_PREFIX):
-            deprecation_message(
-                deprecated_in="0.8.0",
-                removed_in="0.9.0",
-                help_message=f"The property {property_name} is deprecated. Please use properties that start with adls.",
-            )
-
     return AzureBlobFileSystem(
-        connection_string=get_first_property_value(
-            properties,
-            ADLS_CONNECTION_STRING,
-            ADLFS_CONNECTION_STRING,
-        ),
-        account_name=get_first_property_value(
-            properties,
-            ADLS_ACCOUNT_NAME,
-            ADLFS_ACCOUNT_NAME,
-        ),
-        account_key=get_first_property_value(
-            properties,
-            ADLS_ACCOUNT_KEY,
-            ADLFS_ACCOUNT_KEY,
-        ),
-        sas_token=get_first_property_value(
-            properties,
-            ADLS_SAS_TOKEN,
-            ADLFS_SAS_TOKEN,
-        ),
-        tenant_id=get_first_property_value(
-            properties,
-            ADLS_TENANT_ID,
-            ADLFS_TENANT_ID,
-        ),
-        client_id=get_first_property_value(
-            properties,
-            ADLS_CLIENT_ID,
-            ADLFS_CLIENT_ID,
-        ),
-        client_secret=get_first_property_value(
-            properties,
-            ADLS_ClIENT_SECRET,
-            ADLFS_ClIENT_SECRET,
-        ),
+        connection_string=properties.get(ADLS_CONNECTION_STRING),
+        account_name=properties.get(ADLS_ACCOUNT_NAME),
+        account_key=properties.get(ADLS_ACCOUNT_KEY),
+        sas_token=properties.get(ADLS_SAS_TOKEN),
+        tenant_id=properties.get(ADLS_TENANT_ID),
+        client_id=properties.get(ADLS_CLIENT_ID),
+        client_secret=properties.get(ADLS_ClIENT_SECRET),
     )
 
 
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 1ce0842844..d288e4f2f1 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -90,7 +90,6 @@
     AWS_SECRET_ACCESS_KEY,
     AWS_SESSION_TOKEN,
     GCS_DEFAULT_LOCATION,
-    GCS_ENDPOINT,
     GCS_SERVICE_HOST,
     GCS_TOKEN,
     GCS_TOKEN_EXPIRES_AT_MS,
@@ -166,7 +165,6 @@
 from pyiceberg.utils.concurrent import ExecutorFactory
 from pyiceberg.utils.config import Config
 from pyiceberg.utils.datetime import millis_to_datetime
-from pyiceberg.utils.deprecated import deprecation_message
 from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
 from pyiceberg.utils.singleton import Singleton
 from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
@@ -471,13 +469,7 @@ def _initialize_gcs_fs(self) -> FileSystem:
             gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration))
         if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
             gcs_kwargs["default_bucket_location"] = bucket_location
-        if endpoint := get_first_property_value(self.properties, GCS_SERVICE_HOST, GCS_ENDPOINT):
-            if self.properties.get(GCS_ENDPOINT):
-                deprecation_message(
-                    deprecated_in="0.8.0",
-                    removed_in="0.9.0",
-                    help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
-                )
+        if endpoint := self.properties.get(GCS_SERVICE_HOST):
             url_parts = urlparse(endpoint)
             gcs_kwargs["scheme"] = url_parts.scheme
             gcs_kwargs["endpoint_override"] = url_parts.netloc