diff --git a/mkdocs/requirements.txt b/.codespellrc
similarity index 75%
rename from mkdocs/requirements.txt
rename to .codespellrc
index bf992c03a3..a38787e126 100644
--- a/mkdocs/requirements.txt
+++ b/.codespellrc
@@ -14,15 +14,5 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
-mkdocs==1.6.1
-griffe==1.5.1
-jinja2==3.1.5
-mkdocstrings==0.27.0
-mkdocstrings-python==1.12.2
-mkdocs-literate-nav==0.6.1
-mkdocs-autorefs==1.2.0
-mkdocs-gen-files==0.5.0
-mkdocs-material==9.5.49
-mkdocs-material-extensions==1.3.1
-mkdocs-section-index==0.3.9
+[codespell]
+ignore-words-list = BoundIn,fo,MoR,NotIn,notIn,oT
diff --git a/.github/workflows/python-ci-docs.yml b/.github/workflows/python-ci-docs.yml
index 19c4bb6ac1..d6e14c8400 100644
--- a/.github/workflows/python-ci-docs.yml
+++ b/.github/workflows/python-ci-docs.yml
@@ -36,12 +36,12 @@ jobs:
steps:
- uses: actions/checkout@v4
+ - name: Install poetry
+ run: make install-poetry
- uses: actions/setup-python@v5
with:
python-version: 3.12
- name: Install
- working-directory: ./mkdocs
- run: pip install -r requirements.txt
- - name: Build
- working-directory: ./mkdocs
- run: mkdocs build --strict
+ run: make docs-install
+ - name: Build docs
+ run: make docs-build
diff --git a/.github/workflows/python-release-docs.yml b/.github/workflows/python-release-docs.yml
index 2f1b1155e9..2823563fe5 100644
--- a/.github/workflows/python-release-docs.yml
+++ b/.github/workflows/python-release-docs.yml
@@ -31,15 +31,15 @@ jobs:
steps:
- uses: actions/checkout@v4
+ - name: Install poetry
+ run: make install-poetry
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- - name: Install
- working-directory: ./mkdocs
- run: pip install -r requirements.txt
- - name: Build
- working-directory: ./mkdocs
- run: mkdocs build --strict
+ - name: Install docs
+ run: make docs-install
+ - name: Build docs
+ run: make docs-build
- name: Copy
working-directory: ./mkdocs
run: mv ./site /tmp/site
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c0b9a31792..e3dc04bde3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -28,26 +28,19 @@ repos:
- id: check-yaml
- id: check-ast
- repo: https://github.com/astral-sh/ruff-pre-commit
- # Ruff version (Used for linting)
- rev: v0.7.4
+ rev: v0.8.6
hooks:
- id: ruff
- args: [ --fix, --exit-non-zero-on-fix, --preview ]
+ args: [ --fix, --exit-non-zero-on-fix ]
- id: ruff-format
- args: [ --preview ]
- repo: https://github.com/pre-commit/mirrors-mypy
- rev: v1.8.0
+ rev: v1.14.1
hooks:
- id: mypy
args:
[--install-types, --non-interactive, --config=pyproject.toml]
- - repo: https://github.com/hadialqattan/pycln
- rev: v2.4.0
- hooks:
- - id: pycln
- args: [--config=pyproject.toml]
- repo: https://github.com/igorshubovych/markdownlint-cli
- rev: v0.42.0
+ rev: v0.43.0
hooks:
- id: markdownlint
args: ["--fix"]
@@ -69,6 +62,10 @@ repos:
# --line-length is set to a high value to deal with very long lines
- --line-length
- '99999'
+ - repo: https://github.com/codespell-project/codespell
+ rev: v2.3.0
+ hooks:
+ - id: codespell
ci:
autofix_commit_msg: |
[pre-commit.ci] auto fixes from pre-commit.com hooks
diff --git a/Makefile b/Makefile
index f2bb6f6871..b53a98da61 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ install-poetry: ## Install poetry if the user has not done that yet.
echo "Poetry is already installed."; \
fi
-install-dependencies: ## Install dependencies including dev and all extras
+install-dependencies: ## Install dependencies including dev, docs, and all extras
poetry install --all-extras
install: | install-poetry install-dependencies
@@ -97,3 +97,12 @@ clean: ## Clean up the project Python working environment
@find . -name "*.pyd" -exec echo Deleting {} \; -delete
@find . -name "*.pyo" -exec echo Deleting {} \; -delete
@echo "Cleanup complete"
+
+docs-install:
+ poetry install --with docs
+
+docs-serve:
+ poetry run mkdocs serve -f mkdocs/mkdocs.yml
+
+docs-build:
+ poetry run mkdocs build -f mkdocs/mkdocs.yml --strict
diff --git a/mkdocs/README.md b/mkdocs/README.md
index e9e0462bee..271025a726 100644
--- a/mkdocs/README.md
+++ b/mkdocs/README.md
@@ -22,7 +22,6 @@ The pyiceberg docs are stored in `docs/`.
## Running docs locally
```sh
-pip3 install -r requirements.txt
-mkdocs serve
-open http://localhost:8000/
+make docs-install
+make docs-serve
```
diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md
index 7aa4159016..f1ef69b9cb 100644
--- a/mkdocs/docs/api.md
+++ b/mkdocs/docs/api.md
@@ -1005,7 +1005,7 @@ tbl.add_files(file_paths=file_paths)
## Schema evolution
-PyIceberg supports full schema evolution through the Python API. It takes care of setting the field-IDs and makes sure that only non-breaking changes are done (can be overriden).
+PyIceberg supports full schema evolution through the Python API. It takes care of setting the field-IDs and makes sure that only non-breaking changes are done (can be overridden).
In the examples below, the `.update_schema()` is called from the table itself.
@@ -1072,9 +1072,14 @@ Using `add_column` you can add a column, without having to worry about the field
with table.update_schema() as update:
update.add_column("retries", IntegerType(), "Number of retries to place the bid")
# In a struct
- update.add_column("details.confirmed_by", StringType(), "Name of the exchange")
+ update.add_column("details", StructType())
+
+with table.update_schema() as update:
+ update.add_column(("details", "confirmed_by"), StringType(), "Name of the exchange")
```
+A complex type must exist before columns can be added to it. Fields in complex types are added in a tuple.
+
### Rename column
Renaming a field in an Iceberg table is simple:
@@ -1082,20 +1087,21 @@ Renaming a field in an Iceberg table is simple:
```python
with table.update_schema() as update:
update.rename_column("retries", "num_retries")
- # This will rename `confirmed_by` to `exchange`
- update.rename_column("properties.confirmed_by", "exchange")
+ # This will rename `confirmed_by` to `processed_by` in the `details` struct
+ update.rename_column(("details", "confirmed_by"), "processed_by")
```
### Move column
-Move a field inside of struct:
+Move order of fields:
```python
with table.update_schema() as update:
update.move_first("symbol")
+ # This will move `bid` after `ask`
update.move_after("bid", "ask")
- # This will move `confirmed_by` before `exchange`
- update.move_before("details.created_by", "details.exchange")
+ # This will move `confirmed_by` before `exchange` in the `details` struct
+ update.move_before(("details", "confirmed_by"), ("details", "exchange"))
```
### Update column
@@ -1127,6 +1133,8 @@ Delete a field, careful this is a incompatible change (readers/writers might exp
```python
with table.update_schema(allow_incompatible_changes=True) as update:
update.delete_column("some_field")
+ # In a struct
+ update.delete_column(("details", "confirmed_by"))
```
## Partition evolution
diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index 621b313613..06eaac1bed 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -102,21 +102,21 @@ For the FileIO there are several configuration options available:
-| Key | Example | Description |
-|----------------------|----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| s3.endpoint | | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. |
-| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. |
-| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. |
-| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. |
-| s3.role-session-name | session | An optional identifier for the assumed role session. |
-| s3.role-arn | arn:aws:... | AWS Role ARN. If provided instead of access_key and secret_key, temporary credentials will be fetched by assuming this role. |
-| s3.signer | bearer | Configure the signature version of the FileIO. |
-| s3.signer.uri | | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. |
-| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. (default : v1/aws/s3/sign). |
-| s3.region | us-west-2 | Sets the region of the bucket |
-| s3.proxy-uri | | Configure the proxy server to be used by the FileIO. |
-| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. |
-| s3.force-virtual-addressing | False | Whether to use virtual addressing of buckets. If true, then virtual addressing is always enabled. If false, then virtual addressing is only enabled if endpoint_override is empty. This can be used for non-AWS backends that only support virtual hosted-style access. |
+| Key | Example | Description |
+|----------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| s3.endpoint | | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. |
+| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. |
+| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. |
+| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. |
+| s3.role-session-name | session | An optional identifier for the assumed role session. |
+| s3.role-arn | arn:aws:... | AWS Role ARN. If provided instead of access_key and secret_key, temporary credentials will be fetched by assuming this role. |
+| s3.signer | bearer | Configure the signature version of the FileIO. |
+| s3.signer.uri | | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. |
+| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. (default : v1/aws/s3/sign). |
+| s3.region | us-west-2 | Configure the default region used to initialize an `S3FileSystem`. `PyArrowFileIO` attempts to automatically resolve the region for each S3 bucket, falling back to this value if resolution fails. |
+| s3.proxy-uri | | Configure the proxy server to be used by the FileIO. |
+| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. |
+| s3.force-virtual-addressing | False | Whether to use virtual addressing of buckets. If true, then virtual addressing is always enabled. If false, then virtual addressing is only enabled if endpoint_override is empty. This can be used for non-AWS backends that only support virtual hosted-style access. |
diff --git a/mkdocs/docs/how-to-release.md b/mkdocs/docs/how-to-release.md
index bea5548748..c44f56a9ff 100644
--- a/mkdocs/docs/how-to-release.md
+++ b/mkdocs/docs/how-to-release.md
@@ -31,7 +31,7 @@ This guide outlines the process for releasing PyIceberg in accordance with the [
* A GPG key must be registered and published in the [Apache Iceberg KEYS file](https://downloads.apache.org/iceberg/KEYS). Follow [the instructions for setting up a GPG key and uploading it to the KEYS file](#set-up-gpg-key-and-upload-to-apache-iceberg-keys-file).
* SVN Access
- * Permission to upload artifacts to the [Apache development distribution](https://dist.apache.org/repos/dist/dev/iceberg/) (requires Apache Commmitter access).
+ * Permission to upload artifacts to the [Apache development distribution](https://dist.apache.org/repos/dist/dev/iceberg/) (requires Apache Committer access).
* Permission to upload artifacts to the [Apache release distribution](https://dist.apache.org/repos/dist/release/iceberg/) (requires Apache PMC access).
* PyPI Access
* The `twine` package must be installed for uploading releases to PyPi.
diff --git a/mkdocs/docs/verify-release.md b/mkdocs/docs/verify-release.md
index 07e4c32a86..6148bfebdb 100644
--- a/mkdocs/docs/verify-release.md
+++ b/mkdocs/docs/verify-release.md
@@ -111,7 +111,7 @@ To run the full test coverage, with both unit tests and integration tests:
make test-coverage
```
-This will spin up Docker containers to faciliate running test coverage.
+This will spin up Docker containers to facilitate running test coverage.
# Cast the vote
diff --git a/poetry.lock b/poetry.lock
index cc8b4271e6..feaba5edc3 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -25,24 +25,31 @@ tests = ["arrow", "dask[dataframe]", "docker", "pytest", "pytest-mock"]
[[package]]
name = "aiobotocore"
-version = "2.16.0"
+version = "2.17.0"
description = "Async client for aws services using botocore and aiohttp"
optional = true
python-versions = ">=3.8"
files = [
- {file = "aiobotocore-2.16.0-py3-none-any.whl", hash = "sha256:eb3641a7b9c51113adbc33a029441de6201ebb026c64ff2e149c7fa802c9abfc"},
- {file = "aiobotocore-2.16.0.tar.gz", hash = "sha256:6d6721961a81570e9b920b98778d95eec3d52a9f83b7844c6c5cfdbf2a2d6a11"},
+ {file = "aiobotocore-2.17.0-py3-none-any.whl", hash = "sha256:aedccd5368a64401233ef9f27983d3d3cb6a507a6ca981f5ec1df014c00e260e"},
+ {file = "aiobotocore-2.17.0.tar.gz", hash = "sha256:a3041333c565bff9d63b4468bee4944f2d81cff63a45b10e5cc652f3837f9cc2"},
]
[package.dependencies]
aiohttp = ">=3.9.2,<4.0.0"
aioitertools = ">=0.5.1,<1.0.0"
-botocore = ">=1.35.74,<1.35.82"
+botocore = ">=1.35.74,<1.35.94"
+jmespath = ">=0.7.1,<2.0.0"
+multidict = ">=6.0.0,<7.0.0"
+python-dateutil = ">=2.1,<3.0.0"
+urllib3 = [
+ {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""},
+ {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""},
+]
wrapt = ">=1.10.10,<2.0.0"
[package.extras]
-awscli = ["awscli (>=1.36.15,<1.36.23)"]
-boto3 = ["boto3 (>=1.35.74,<1.35.82)"]
+awscli = ["awscli (>=1.36.15,<1.36.35)"]
+boto3 = ["boto3 (>=1.35.74,<1.35.94)"]
[[package]]
name = "aiohappyeyeballs"
@@ -185,6 +192,17 @@ files = [
[package.dependencies]
frozenlist = ">=1.1.0"
+[[package]]
+name = "alabaster"
+version = "0.7.16"
+description = "A light, configurable Sphinx theme"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"},
+ {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"},
+]
+
[[package]]
name = "annotated-types"
version = "0.7.0"
@@ -345,6 +363,35 @@ typing-extensions = ">=4.6.0"
[package.extras]
aio = ["azure-core[aio] (>=1.30.0)"]
+[[package]]
+name = "babel"
+version = "2.16.0"
+description = "Internationalization utilities"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"},
+ {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"},
+]
+
+[package.extras]
+dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
+
+[[package]]
+name = "backports-tarfile"
+version = "1.2.0"
+description = "Backport of CPython tarfile module"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
+ {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
+]
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)"]
+
[[package]]
name = "blinker"
version = "1.9.0"
@@ -358,17 +405,17 @@ files = [
[[package]]
name = "boto3"
-version = "1.35.81"
+version = "1.35.93"
description = "The AWS SDK for Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "boto3-1.35.81-py3-none-any.whl", hash = "sha256:742941b2424c0223d2d94a08c3485462fa7c58d816b62ca80f08e555243acee1"},
- {file = "boto3-1.35.81.tar.gz", hash = "sha256:d2e95fa06f095b8e0c545dd678c6269d253809b2997c30f5ce8a956c410b4e86"},
+ {file = "boto3-1.35.93-py3-none-any.whl", hash = "sha256:7de2c44c960e486f3c57e5203ea6393c6c4f0914c5f81c789ceb8b5d2ba5d1c5"},
+ {file = "boto3-1.35.93.tar.gz", hash = "sha256:2446e819cf4e295833474cdcf2c92bc82718ce537e9ee1f17f7e3d237f60e69b"},
]
[package.dependencies]
-botocore = ">=1.35.81,<1.36.0"
+botocore = ">=1.35.93,<1.36.0"
jmespath = ">=0.7.1,<2.0.0"
s3transfer = ">=0.10.0,<0.11.0"
@@ -377,13 +424,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
[[package]]
name = "botocore"
-version = "1.35.81"
+version = "1.35.93"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">=3.8"
files = [
- {file = "botocore-1.35.81-py3-none-any.whl", hash = "sha256:a7b13bbd959bf2d6f38f681676aab408be01974c46802ab997617b51399239f7"},
- {file = "botocore-1.35.81.tar.gz", hash = "sha256:564c2478e50179e0b766e6a87e5e0cdd35e1bc37eb375c1cf15511f5dd13600d"},
+ {file = "botocore-1.35.93-py3-none-any.whl", hash = "sha256:47f7161000af6036f806449e3de12acdd3ec11aac7f5578e43e96241413a0f8f"},
+ {file = "botocore-1.35.93.tar.gz", hash = "sha256:b8d245a01e7d64c41edcf75a42be158df57b9518a83a3dbf5c7e4b8c2bc540cc"},
]
[package.dependencies]
@@ -414,6 +461,7 @@ importlib-metadata = {version = ">=4.6", markers = "python_full_version < \"3.10
packaging = ">=19.1"
pyproject_hooks = "*"
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+virtualenv = {version = ">=20.0.35", optional = true, markers = "extra == \"virtualenv\""}
[package.extras]
docs = ["furo (>=2023.08.17)", "sphinx (>=7.0,<8.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)", "sphinx-issues (>=3.0.0)"]
@@ -701,73 +749,73 @@ files = [
[[package]]
name = "coverage"
-version = "7.6.9"
+version = "7.6.10"
description = "Code coverage measurement for Python"
optional = false
python-versions = ">=3.9"
files = [
- {file = "coverage-7.6.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:85d9636f72e8991a1706b2b55b06c27545448baf9f6dbf51c4004609aacd7dcb"},
- {file = "coverage-7.6.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:608a7fd78c67bee8936378299a6cb9f5149bb80238c7a566fc3e6717a4e68710"},
- {file = "coverage-7.6.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96d636c77af18b5cb664ddf12dab9b15a0cfe9c0bde715da38698c8cea748bfa"},
- {file = "coverage-7.6.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d75cded8a3cff93da9edc31446872d2997e327921d8eed86641efafd350e1df1"},
- {file = "coverage-7.6.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7b15f589593110ae767ce997775d645b47e5cbbf54fd322f8ebea6277466cec"},
- {file = "coverage-7.6.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:44349150f6811b44b25574839b39ae35291f6496eb795b7366fef3bd3cf112d3"},
- {file = "coverage-7.6.9-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d891c136b5b310d0e702e186d70cd16d1119ea8927347045124cb286b29297e5"},
- {file = "coverage-7.6.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:db1dab894cc139f67822a92910466531de5ea6034ddfd2b11c0d4c6257168073"},
- {file = "coverage-7.6.9-cp310-cp310-win32.whl", hash = "sha256:41ff7b0da5af71a51b53f501a3bac65fb0ec311ebed1632e58fc6107f03b9198"},
- {file = "coverage-7.6.9-cp310-cp310-win_amd64.whl", hash = "sha256:35371f8438028fdccfaf3570b31d98e8d9eda8bb1d6ab9473f5a390969e98717"},
- {file = "coverage-7.6.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:932fc826442132dde42ee52cf66d941f581c685a6313feebed358411238f60f9"},
- {file = "coverage-7.6.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:085161be5f3b30fd9b3e7b9a8c301f935c8313dcf928a07b116324abea2c1c2c"},
- {file = "coverage-7.6.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccc660a77e1c2bf24ddbce969af9447a9474790160cfb23de6be4fa88e3951c7"},
- {file = "coverage-7.6.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c69e42c892c018cd3c8d90da61d845f50a8243062b19d228189b0224150018a9"},
- {file = "coverage-7.6.9-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0824a28ec542a0be22f60c6ac36d679e0e262e5353203bea81d44ee81fe9c6d4"},
- {file = "coverage-7.6.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4401ae5fc52ad8d26d2a5d8a7428b0f0c72431683f8e63e42e70606374c311a1"},
- {file = "coverage-7.6.9-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:98caba4476a6c8d59ec1eb00c7dd862ba9beca34085642d46ed503cc2d440d4b"},
- {file = "coverage-7.6.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ee5defd1733fd6ec08b168bd4f5387d5b322f45ca9e0e6c817ea6c4cd36313e3"},
- {file = "coverage-7.6.9-cp311-cp311-win32.whl", hash = "sha256:f2d1ec60d6d256bdf298cb86b78dd715980828f50c46701abc3b0a2b3f8a0dc0"},
- {file = "coverage-7.6.9-cp311-cp311-win_amd64.whl", hash = "sha256:0d59fd927b1f04de57a2ba0137166d31c1a6dd9e764ad4af552912d70428c92b"},
- {file = "coverage-7.6.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:99e266ae0b5d15f1ca8d278a668df6f51cc4b854513daab5cae695ed7b721cf8"},
- {file = "coverage-7.6.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9901d36492009a0a9b94b20e52ebfc8453bf49bb2b27bca2c9706f8b4f5a554a"},
- {file = "coverage-7.6.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abd3e72dd5b97e3af4246cdada7738ef0e608168de952b837b8dd7e90341f015"},
- {file = "coverage-7.6.9-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff74026a461eb0660366fb01c650c1d00f833a086b336bdad7ab00cc952072b3"},
- {file = "coverage-7.6.9-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65dad5a248823a4996724a88eb51d4b31587aa7aa428562dbe459c684e5787ae"},
- {file = "coverage-7.6.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:22be16571504c9ccea919fcedb459d5ab20d41172056206eb2994e2ff06118a4"},
- {file = "coverage-7.6.9-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f957943bc718b87144ecaee70762bc2bc3f1a7a53c7b861103546d3a403f0a6"},
- {file = "coverage-7.6.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ae1387db4aecb1f485fb70a6c0148c6cdaebb6038f1d40089b1fc84a5db556f"},
- {file = "coverage-7.6.9-cp312-cp312-win32.whl", hash = "sha256:1a330812d9cc7ac2182586f6d41b4d0fadf9be9049f350e0efb275c8ee8eb692"},
- {file = "coverage-7.6.9-cp312-cp312-win_amd64.whl", hash = "sha256:b12c6b18269ca471eedd41c1b6a1065b2f7827508edb9a7ed5555e9a56dcfc97"},
- {file = "coverage-7.6.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:899b8cd4781c400454f2f64f7776a5d87bbd7b3e7f7bda0cb18f857bb1334664"},
- {file = "coverage-7.6.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:61f70dc68bd36810972e55bbbe83674ea073dd1dcc121040a08cdf3416c5349c"},
- {file = "coverage-7.6.9-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a289d23d4c46f1a82d5db4abeb40b9b5be91731ee19a379d15790e53031c014"},
- {file = "coverage-7.6.9-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e216d8044a356fc0337c7a2a0536d6de07888d7bcda76febcb8adc50bdbbd00"},
- {file = "coverage-7.6.9-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c026eb44f744acaa2bda7493dad903aa5bf5fc4f2554293a798d5606710055d"},
- {file = "coverage-7.6.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e77363e8425325384f9d49272c54045bbed2f478e9dd698dbc65dbc37860eb0a"},
- {file = "coverage-7.6.9-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:777abfab476cf83b5177b84d7486497e034eb9eaea0d746ce0c1268c71652077"},
- {file = "coverage-7.6.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:447af20e25fdbe16f26e84eb714ba21d98868705cb138252d28bc400381f6ffb"},
- {file = "coverage-7.6.9-cp313-cp313-win32.whl", hash = "sha256:d872ec5aeb086cbea771c573600d47944eea2dcba8be5f3ee649bfe3cb8dc9ba"},
- {file = "coverage-7.6.9-cp313-cp313-win_amd64.whl", hash = "sha256:fd1213c86e48dfdc5a0cc676551db467495a95a662d2396ecd58e719191446e1"},
- {file = "coverage-7.6.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9e7484d286cd5a43744e5f47b0b3fb457865baf07bafc6bee91896364e1419"},
- {file = "coverage-7.6.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e5ea1cf0872ee455c03e5674b5bca5e3e68e159379c1af0903e89f5eba9ccc3a"},
- {file = "coverage-7.6.9-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d10e07aa2b91835d6abec555ec8b2733347956991901eea6ffac295f83a30e4"},
- {file = "coverage-7.6.9-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:13a9e2d3ee855db3dd6ea1ba5203316a1b1fd8eaeffc37c5b54987e61e4194ae"},
- {file = "coverage-7.6.9-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c38bf15a40ccf5619fa2fe8f26106c7e8e080d7760aeccb3722664c8656b030"},
- {file = "coverage-7.6.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d5275455b3e4627c8e7154feaf7ee0743c2e7af82f6e3b561967b1cca755a0be"},
- {file = "coverage-7.6.9-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8f8770dfc6e2c6a2d4569f411015c8d751c980d17a14b0530da2d7f27ffdd88e"},
- {file = "coverage-7.6.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8d2dfa71665a29b153a9681edb1c8d9c1ea50dfc2375fb4dac99ea7e21a0bcd9"},
- {file = "coverage-7.6.9-cp313-cp313t-win32.whl", hash = "sha256:5e6b86b5847a016d0fbd31ffe1001b63355ed309651851295315031ea7eb5a9b"},
- {file = "coverage-7.6.9-cp313-cp313t-win_amd64.whl", hash = "sha256:97ddc94d46088304772d21b060041c97fc16bdda13c6c7f9d8fcd8d5ae0d8611"},
- {file = "coverage-7.6.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:adb697c0bd35100dc690de83154627fbab1f4f3c0386df266dded865fc50a902"},
- {file = "coverage-7.6.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:be57b6d56e49c2739cdf776839a92330e933dd5e5d929966fbbd380c77f060be"},
- {file = "coverage-7.6.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1592791f8204ae9166de22ba7e6705fa4ebd02936c09436a1bb85aabca3e599"},
- {file = "coverage-7.6.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e12ae8cc979cf83d258acb5e1f1cf2f3f83524d1564a49d20b8bec14b637f08"},
- {file = "coverage-7.6.9-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb5555cff66c4d3d6213a296b360f9e1a8e323e74e0426b6c10ed7f4d021e464"},
- {file = "coverage-7.6.9-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b9389a429e0e5142e69d5bf4a435dd688c14478a19bb901735cdf75e57b13845"},
- {file = "coverage-7.6.9-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:592ac539812e9b46046620341498caf09ca21023c41c893e1eb9dbda00a70cbf"},
- {file = "coverage-7.6.9-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a27801adef24cc30871da98a105f77995e13a25a505a0161911f6aafbd66e678"},
- {file = "coverage-7.6.9-cp39-cp39-win32.whl", hash = "sha256:8e3c3e38930cfb729cb8137d7f055e5a473ddaf1217966aa6238c88bd9fd50e6"},
- {file = "coverage-7.6.9-cp39-cp39-win_amd64.whl", hash = "sha256:e28bf44afa2b187cc9f41749138a64435bf340adfcacb5b2290c070ce99839d4"},
- {file = "coverage-7.6.9-pp39.pp310-none-any.whl", hash = "sha256:f3ca78518bc6bc92828cd11867b121891d75cae4ea9e908d72030609b996db1b"},
- {file = "coverage-7.6.9.tar.gz", hash = "sha256:4a8d8977b0c6ef5aeadcb644da9e69ae0dcfe66ec7f368c89c72e058bd71164d"},
+ {file = "coverage-7.6.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5c912978f7fbf47ef99cec50c4401340436d200d41d714c7a4766f377c5b7b78"},
+ {file = "coverage-7.6.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a01ec4af7dfeb96ff0078ad9a48810bb0cc8abcb0115180c6013a6b26237626c"},
+ {file = "coverage-7.6.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3b204c11e2b2d883946fe1d97f89403aa1811df28ce0447439178cc7463448a"},
+ {file = "coverage-7.6.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32ee6d8491fcfc82652a37109f69dee9a830e9379166cb73c16d8dc5c2915165"},
+ {file = "coverage-7.6.10-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675cefc4c06e3b4c876b85bfb7c59c5e2218167bbd4da5075cbe3b5790a28988"},
+ {file = "coverage-7.6.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f4f620668dbc6f5e909a0946a877310fb3d57aea8198bde792aae369ee1c23b5"},
+ {file = "coverage-7.6.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4eea95ef275de7abaef630c9b2c002ffbc01918b726a39f5a4353916ec72d2f3"},
+ {file = "coverage-7.6.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e2f0280519e42b0a17550072861e0bc8a80a0870de260f9796157d3fca2733c5"},
+ {file = "coverage-7.6.10-cp310-cp310-win32.whl", hash = "sha256:bc67deb76bc3717f22e765ab3e07ee9c7a5e26b9019ca19a3b063d9f4b874244"},
+ {file = "coverage-7.6.10-cp310-cp310-win_amd64.whl", hash = "sha256:0f460286cb94036455e703c66988851d970fdfd8acc2a1122ab7f4f904e4029e"},
+ {file = "coverage-7.6.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ea3c8f04b3e4af80e17bab607c386a830ffc2fb88a5484e1df756478cf70d1d3"},
+ {file = "coverage-7.6.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:507a20fc863cae1d5720797761b42d2d87a04b3e5aeb682ef3b7332e90598f43"},
+ {file = "coverage-7.6.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d37a84878285b903c0fe21ac8794c6dab58150e9359f1aaebbeddd6412d53132"},
+ {file = "coverage-7.6.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a534738b47b0de1995f85f582d983d94031dffb48ab86c95bdf88dc62212142f"},
+ {file = "coverage-7.6.10-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d7a2bf79378d8fb8afaa994f91bfd8215134f8631d27eba3e0e2c13546ce994"},
+ {file = "coverage-7.6.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6713ba4b4ebc330f3def51df1d5d38fad60b66720948112f114968feb52d3f99"},
+ {file = "coverage-7.6.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab32947f481f7e8c763fa2c92fd9f44eeb143e7610c4ca9ecd6a36adab4081bd"},
+ {file = "coverage-7.6.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7bbd8c8f1b115b892e34ba66a097b915d3871db7ce0e6b9901f462ff3a975377"},
+ {file = "coverage-7.6.10-cp311-cp311-win32.whl", hash = "sha256:299e91b274c5c9cdb64cbdf1b3e4a8fe538a7a86acdd08fae52301b28ba297f8"},
+ {file = "coverage-7.6.10-cp311-cp311-win_amd64.whl", hash = "sha256:489a01f94aa581dbd961f306e37d75d4ba16104bbfa2b0edb21d29b73be83609"},
+ {file = "coverage-7.6.10-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:27c6e64726b307782fa5cbe531e7647aee385a29b2107cd87ba7c0105a5d3853"},
+ {file = "coverage-7.6.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c56e097019e72c373bae32d946ecf9858fda841e48d82df7e81c63ac25554078"},
+ {file = "coverage-7.6.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7827a5bc7bdb197b9e066cdf650b2887597ad124dd99777332776f7b7c7d0d0"},
+ {file = "coverage-7.6.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:204a8238afe787323a8b47d8be4df89772d5c1e4651b9ffa808552bdf20e1d50"},
+ {file = "coverage-7.6.10-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e67926f51821b8e9deb6426ff3164870976fe414d033ad90ea75e7ed0c2e5022"},
+ {file = "coverage-7.6.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e78b270eadb5702938c3dbe9367f878249b5ef9a2fcc5360ac7bff694310d17b"},
+ {file = "coverage-7.6.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:714f942b9c15c3a7a5fe6876ce30af831c2ad4ce902410b7466b662358c852c0"},
+ {file = "coverage-7.6.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:abb02e2f5a3187b2ac4cd46b8ced85a0858230b577ccb2c62c81482ca7d18852"},
+ {file = "coverage-7.6.10-cp312-cp312-win32.whl", hash = "sha256:55b201b97286cf61f5e76063f9e2a1d8d2972fc2fcfd2c1272530172fd28c359"},
+ {file = "coverage-7.6.10-cp312-cp312-win_amd64.whl", hash = "sha256:e4ae5ac5e0d1e4edfc9b4b57b4cbecd5bc266a6915c500f358817a8496739247"},
+ {file = "coverage-7.6.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05fca8ba6a87aabdd2d30d0b6c838b50510b56cdcfc604d40760dae7153b73d9"},
+ {file = "coverage-7.6.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9e80eba8801c386f72e0712a0453431259c45c3249f0009aff537a517b52942b"},
+ {file = "coverage-7.6.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a372c89c939d57abe09e08c0578c1d212e7a678135d53aa16eec4430adc5e690"},
+ {file = "coverage-7.6.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec22b5e7fe7a0fa8509181c4aac1db48f3dd4d3a566131b313d1efc102892c18"},
+ {file = "coverage-7.6.10-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26bcf5c4df41cad1b19c84af71c22cbc9ea9a547fc973f1f2cc9a290002c8b3c"},
+ {file = "coverage-7.6.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e4630c26b6084c9b3cb53b15bd488f30ceb50b73c35c5ad7871b869cb7365fd"},
+ {file = "coverage-7.6.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2396e8116db77789f819d2bc8a7e200232b7a282c66e0ae2d2cd84581a89757e"},
+ {file = "coverage-7.6.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79109c70cc0882e4d2d002fe69a24aa504dec0cc17169b3c7f41a1d341a73694"},
+ {file = "coverage-7.6.10-cp313-cp313-win32.whl", hash = "sha256:9e1747bab246d6ff2c4f28b4d186b205adced9f7bd9dc362051cc37c4a0c7bd6"},
+ {file = "coverage-7.6.10-cp313-cp313-win_amd64.whl", hash = "sha256:254f1a3b1eef5f7ed23ef265eaa89c65c8c5b6b257327c149db1ca9d4a35f25e"},
+ {file = "coverage-7.6.10-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2ccf240eb719789cedbb9fd1338055de2761088202a9a0b73032857e53f612fe"},
+ {file = "coverage-7.6.10-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0c807ca74d5a5e64427c8805de15b9ca140bba13572d6d74e262f46f50b13273"},
+ {file = "coverage-7.6.10-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bcfa46d7709b5a7ffe089075799b902020b62e7ee56ebaed2f4bdac04c508d8"},
+ {file = "coverage-7.6.10-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e0de1e902669dccbf80b0415fb6b43d27edca2fbd48c74da378923b05316098"},
+ {file = "coverage-7.6.10-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7b444c42bbc533aaae6b5a2166fd1a797cdb5eb58ee51a92bee1eb94a1e1cb"},
+ {file = "coverage-7.6.10-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b330368cb99ef72fcd2dc3ed260adf67b31499584dc8a20225e85bfe6f6cfed0"},
+ {file = "coverage-7.6.10-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:9a7cfb50515f87f7ed30bc882f68812fd98bc2852957df69f3003d22a2aa0abf"},
+ {file = "coverage-7.6.10-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f93531882a5f68c28090f901b1d135de61b56331bba82028489bc51bdd818d2"},
+ {file = "coverage-7.6.10-cp313-cp313t-win32.whl", hash = "sha256:89d76815a26197c858f53c7f6a656686ec392b25991f9e409bcef020cd532312"},
+ {file = "coverage-7.6.10-cp313-cp313t-win_amd64.whl", hash = "sha256:54a5f0f43950a36312155dae55c505a76cd7f2b12d26abeebbe7a0b36dbc868d"},
+ {file = "coverage-7.6.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:656c82b8a0ead8bba147de9a89bda95064874c91a3ed43a00e687f23cc19d53a"},
+ {file = "coverage-7.6.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ccc2b70a7ed475c68ceb548bf69cec1e27305c1c2606a5eb7c3afff56a1b3b27"},
+ {file = "coverage-7.6.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5e37dc41d57ceba70956fa2fc5b63c26dba863c946ace9705f8eca99daecdc4"},
+ {file = "coverage-7.6.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0aa9692b4fdd83a4647eeb7db46410ea1322b5ed94cd1715ef09d1d5922ba87f"},
+ {file = "coverage-7.6.10-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa744da1820678b475e4ba3dfd994c321c5b13381d1041fe9c608620e6676e25"},
+ {file = "coverage-7.6.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c0b1818063dc9e9d838c09e3a473c1422f517889436dd980f5d721899e66f315"},
+ {file = "coverage-7.6.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:59af35558ba08b758aec4d56182b222976330ef8d2feacbb93964f576a7e7a90"},
+ {file = "coverage-7.6.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7ed2f37cfce1ce101e6dffdfd1c99e729dd2ffc291d02d3e2d0af8b53d13840d"},
+ {file = "coverage-7.6.10-cp39-cp39-win32.whl", hash = "sha256:4bcc276261505d82f0ad426870c3b12cb177752834a633e737ec5ee79bbdff18"},
+ {file = "coverage-7.6.10-cp39-cp39-win_amd64.whl", hash = "sha256:457574f4599d2b00f7f637a0700a6422243b3565509457b2dbd3f50703e11f59"},
+ {file = "coverage-7.6.10-pp39.pp310-none-any.whl", hash = "sha256:fd34e7b3405f0cc7ab03d54a334c17a9e802897580d964bd8c2001f4b9fd488f"},
+ {file = "coverage-7.6.10.tar.gz", hash = "sha256:7fb105327c8f8f0682e29843e2ff96af9dcbe5bab8eeb4b398c6a33a16d80a23"},
]
[package.dependencies]
@@ -1015,27 +1063,27 @@ files = [
[[package]]
name = "deptry"
-version = "0.21.2"
+version = "0.22.0"
description = "A command line utility to check for unused, missing and transitive dependencies in a Python project."
optional = false
python-versions = ">=3.9"
files = [
- {file = "deptry-0.21.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e3b9e0c5ee437240b65e61107b5777a12064f78f604bf9f181a96c9b56eb896d"},
- {file = "deptry-0.21.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:d76bbf48bd62ecc44ca3d414769bd4b7956598d23d9ccb42fd359b831a31cab2"},
- {file = "deptry-0.21.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3080bb88c16ebd35f59cba7688416115b7aaf4630dc5a051dff2649cbf129a1b"},
- {file = "deptry-0.21.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adb12d6678fb5dbd320a0a2e37881059d0a45bec6329df4250c977d803fe7f96"},
- {file = "deptry-0.21.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:7479d3079be69c3bbf5913d8e21090749c1139ee91f81520ffce90b5322476b0"},
- {file = "deptry-0.21.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:019167b35301edd2bdd4719c8b8f44769be4507cb8a1cd46fff4393cdbe8d31b"},
- {file = "deptry-0.21.2-cp39-abi3-win_amd64.whl", hash = "sha256:d8add495f0dd19a38aa6d1e09b14b1441bca47c9d945bc7b322efb084313eea3"},
- {file = "deptry-0.21.2-cp39-abi3-win_arm64.whl", hash = "sha256:06d48e9fa460aad02f9e1b079d9f5a69d622d291b3a0525b722fc91c88032042"},
- {file = "deptry-0.21.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3ef8aed33a2eac357f9565063bc1257bcefa03a37038299c08a4222e28f3cd34"},
- {file = "deptry-0.21.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:917745db5f8295eb5048e43d9073a9a675ffdba865e9b294d2e7aa455730cb06"},
- {file = "deptry-0.21.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:186ddbc69c1f70e684e83e202795e1054d0c2dfc03b8acc077f65dc3b6a7f4ce"},
- {file = "deptry-0.21.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3686e86ad7063b5a6e5253454f9d9e4a7a6b1511a99bd4306fda5424480be48"},
- {file = "deptry-0.21.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1012a88500f242489066f811f6ec0c93328d9340bbf0f87f0c7d2146054d197e"},
- {file = "deptry-0.21.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:769bb658172586d1b03046bdc6b6c94f6a98ecfbac04ff7f77ec61768c75e1c2"},
- {file = "deptry-0.21.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fb2f43747b58abeec01dc277ef22859342f3bca2ac677818c94940a009b436c0"},
- {file = "deptry-0.21.2.tar.gz", hash = "sha256:4e870553c7a1fafcd99a83ba4137259525679eecabeff61bc669741efa201541"},
+ {file = "deptry-0.22.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2b903c94162e30640bb7a3e6800c7afd03a6bb12b693a21290e06c713dba35af"},
+ {file = "deptry-0.22.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8b523a33bed952679c97a9f55c690803f0fbeb32649946dcc1362c3f015897c7"},
+ {file = "deptry-0.22.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c68fa570be1443888d252c6f551356777e56e82e492e68e6db3d65b31100c450"},
+ {file = "deptry-0.22.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:016f8a5b6c32762beea47a4d9d2d7b04f1b6e534448e5444c7a742bd2fdb260d"},
+ {file = "deptry-0.22.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:46c868a0493556b41096f9824a15a3ce38811e6b4a2699ebec16e06e9f85cd84"},
+ {file = "deptry-0.22.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:aebba0d1ca119f6241ff0d5b72e72a9b912fa880e81f4ab346a32d9001d6ddb1"},
+ {file = "deptry-0.22.0-cp39-abi3-win_amd64.whl", hash = "sha256:2da497a9888f930b5c86c6524b29a4d284ed320edd4148ecc2e45e10f177f4fe"},
+ {file = "deptry-0.22.0-cp39-abi3-win_arm64.whl", hash = "sha256:35acf2ac783ba2ec43ba593ba14e0080393c0ab24797ba55fbed30f0ba02259f"},
+ {file = "deptry-0.22.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9db9d0b8244e2b20bd75a21312c35ee628a602b00c0e2f267fb90f4600de6d2d"},
+ {file = "deptry-0.22.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:edd0060065325cd70e6ce47feaa724cdb7fc3f4de673e4ed0fa38e8c1adc4155"},
+ {file = "deptry-0.22.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b371a3c3194c2db9196ab1f80d5ce08138dea731eff8dd9fb2997da42941fa7"},
+ {file = "deptry-0.22.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e20a8ba89078d06440316dba719c2278fdb19923e76633b808fd1b5670020c4"},
+ {file = "deptry-0.22.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f4872f48225d1e7dbacb1be5e427945c8f76abf6b91453e038aae076b638ba01"},
+ {file = "deptry-0.22.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:9a12ebe86299e7bb054804464467f33c49e5a34f204b710fa10fbe1f31c56964"},
+ {file = "deptry-0.22.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fbe6211b972337acdeec6c11a82b666597c1edd6c6e2a93eb705bf49644bfb08"},
+ {file = "deptry-0.22.0.tar.gz", hash = "sha256:32212cd40562f71b24da69babaed9a4233c567da390f681d86bb66f8ec4d2bfe"},
]
[package.dependencies]
@@ -1089,6 +1137,25 @@ files = [
{file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"},
]
+[[package]]
+name = "domdf-python-tools"
+version = "3.9.0"
+description = "Helpful functions for Pythonβπβπ οΈ"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "domdf_python_tools-3.9.0-py3-none-any.whl", hash = "sha256:4e1ef365cbc24627d6d1e90cf7d46d8ab8df967e1237f4a26885f6986c78872e"},
+ {file = "domdf_python_tools-3.9.0.tar.gz", hash = "sha256:1f8a96971178333a55e083e35610d7688cd7620ad2b99790164e1fc1a3614c18"},
+]
+
+[package.dependencies]
+natsort = ">=7.0.1"
+typing-extensions = ">=3.7.4.1"
+
+[package.extras]
+all = ["pytz (>=2019.1)"]
+dates = ["pytz (>=2019.1)"]
+
[[package]]
name = "duckdb"
version = "1.1.3"
@@ -1429,17 +1496,17 @@ gcsfuse = ["fusepy"]
[[package]]
name = "getdaft"
-version = "0.4.1"
+version = "0.4.2"
description = "Distributed Dataframes for Multimodal Data"
optional = true
python-versions = ">=3.9"
files = [
- {file = "getdaft-0.4.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:04b91c019be87415138edfa61c379174a49760c4474c60eb37b1c24ae010a7d5"},
- {file = "getdaft-0.4.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:6254f33b5292b3198b6a0e4fdd0d2f568ff624930203d9af75bbc3b7e40e8c0b"},
- {file = "getdaft-0.4.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a642f786175f543cb0d2dc585577c554b135f5ac2e7b34bfbe359dd86adbdbae"},
- {file = "getdaft-0.4.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c1e1b0c283e0efc5102dea04db9a98bad6bcf36829a6c3d6cd511e8805514c0"},
- {file = "getdaft-0.4.1-cp39-abi3-win_amd64.whl", hash = "sha256:46985b2ec980134b97d3b8e95becd2b654cb74e2952d7b24b6f3b55d28d16de2"},
- {file = "getdaft-0.4.1.tar.gz", hash = "sha256:d3ad8b11b06bbf25b62a091444917593933ff53c39fb4a8abca8cbc6dde3b917"},
+ {file = "getdaft-0.4.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3760e69e66e571dbb42ad354954bd52d3ce8eafdfc93c9bdaf2c1ed42017808e"},
+ {file = "getdaft-0.4.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:2b1c072f69663b87e4f3aa926cf7441d1d150fe46a6d2b32c8b01f72a237680b"},
+ {file = "getdaft-0.4.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0e6450fd90743bd981575dc3a1b6694fe1e4a9fe2fc31ea5ad1ca92e1dabef2"},
+ {file = "getdaft-0.4.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0852c71f81e1ff4fffd60ee7542ff325d1e93ec857adff8c26494a0188dc79ae"},
+ {file = "getdaft-0.4.2-cp39-abi3-win_amd64.whl", hash = "sha256:687031e101dd4df151f387cc8a2a60bfc6bda640d4deb2d3a74a4f742eb57edf"},
+ {file = "getdaft-0.4.2.tar.gz", hash = "sha256:9d253a5dce0ee798be9737ef1da60f313235fd459b4ff3b48e6aafe30538ff21"},
]
[package.dependencies]
@@ -1461,6 +1528,23 @@ ray = ["packaging", "ray[client,data] (>=2.0.0)", "ray[client,data] (>=2.10.0)"]
sql = ["connectorx", "sqlalchemy", "sqlglot"]
unity = ["unitycatalog"]
+[[package]]
+name = "ghp-import"
+version = "2.1.0"
+description = "Copy your docs directly to the gh-pages branch."
+optional = false
+python-versions = "*"
+files = [
+ {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"},
+ {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"},
+]
+
+[package.dependencies]
+python-dateutil = ">=2.8.1"
+
+[package.extras]
+dev = ["flake8", "markdown", "twine", "wheel"]
+
[[package]]
name = "google-api-core"
version = "2.24.0"
@@ -1745,6 +1829,20 @@ files = [
docs = ["Sphinx", "furo"]
test = ["objgraph", "psutil"]
+[[package]]
+name = "griffe"
+version = "1.5.4"
+description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API."
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "griffe-1.5.4-py3-none-any.whl", hash = "sha256:ed33af890586a5bebc842fcb919fc694b3dc1bc55b7d9e0228de41ce566b4a1d"},
+ {file = "griffe-1.5.4.tar.gz", hash = "sha256:073e78ad3e10c8378c2f798bd4ef87b92d8411e9916e157fd366a17cc4fd4e52"},
+]
+
+[package.dependencies]
+colorama = ">=0.4"
+
[[package]]
name = "identify"
version = "2.6.3"
@@ -1773,6 +1871,17 @@ files = [
[package.extras]
all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+[[package]]
+name = "imagesize"
+version = "1.4.1"
+description = "Getting image size from png/jpeg/jpeg2000/gif file"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+ {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"},
+ {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"},
+]
+
[[package]]
name = "importlib-metadata"
version = "8.5.0"
@@ -1829,6 +1938,45 @@ files = [
{file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"},
]
+[[package]]
+name = "jaraco-context"
+version = "6.0.1"
+description = "Useful decorators and context managers"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "jaraco.context-6.0.1-py3-none-any.whl", hash = "sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4"},
+ {file = "jaraco_context-6.0.1.tar.gz", hash = "sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3"},
+]
+
+[package.dependencies]
+"backports.tarfile" = {version = "*", markers = "python_version < \"3.12\""}
+
+[package.extras]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["portend", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+
+[[package]]
+name = "jaraco-packaging"
+version = "10.2.3"
+description = "tools to supplement packaging Python releases"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "jaraco.packaging-10.2.3-py3-none-any.whl", hash = "sha256:ceb5806d2ac5731ba5b265d196e4cb848afa2a958f01d0bf3a1dfaa3969ed92c"},
+ {file = "jaraco_packaging-10.2.3.tar.gz", hash = "sha256:d726cc42faa62b2f70585cbe1176b4b469fe6d75f21b19034b688b4340917933"},
+]
+
+[package.dependencies]
+build = {version = "*", extras = ["virtualenv"]}
+domdf-python-tools = "*"
+"jaraco.context" = "*"
+sphinx = "*"
+
+[package.extras]
+doc = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "types-docutils"]
+
[[package]]
name = "jinja2"
version = "3.1.5"
@@ -1874,23 +2022,6 @@ cryptography = "*"
[package.extras]
drafts = ["pycryptodome"]
-[[package]]
-name = "jsondiff"
-version = "2.2.1"
-description = "Diff JSON and JSON-like structures in Python"
-optional = false
-python-versions = ">=3.8"
-files = [
- {file = "jsondiff-2.2.1-py3-none-any.whl", hash = "sha256:b1f0f7e2421881848b1d556d541ac01a91680cfcc14f51a9b62cdf4da0e56722"},
- {file = "jsondiff-2.2.1.tar.gz", hash = "sha256:658d162c8a86ba86de26303cd86a7b37e1b2c1ec98b569a60e2ca6180545f7fe"},
-]
-
-[package.dependencies]
-pyyaml = "*"
-
-[package.extras]
-dev = ["build", "hypothesis", "pytest", "setuptools-scm"]
-
[[package]]
name = "jsonpatch"
version = "1.33"
@@ -2029,6 +2160,24 @@ files = [
{file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"},
]
+[[package]]
+name = "markdown"
+version = "3.7"
+description = "Python implementation of John Gruber's Markdown."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"},
+ {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"},
+]
+
+[package.dependencies]
+importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""}
+
+[package.extras]
+docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"]
+testing = ["coverage", "pyyaml"]
+
[[package]]
name = "markdown-it-py"
version = "3.0.0"
@@ -2134,6 +2283,207 @@ files = [
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
]
+[[package]]
+name = "mergedeep"
+version = "1.3.4"
+description = "A deep merge function for π."
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"},
+ {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"},
+]
+
+[[package]]
+name = "mkdocs"
+version = "1.6.1"
+description = "Project documentation with Markdown."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"},
+ {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"},
+]
+
+[package.dependencies]
+click = ">=7.0"
+colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""}
+ghp-import = ">=1.0"
+importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""}
+jinja2 = ">=2.11.1"
+markdown = ">=3.3.6"
+markupsafe = ">=2.0.1"
+mergedeep = ">=1.3.4"
+mkdocs-get-deps = ">=0.2.0"
+packaging = ">=20.5"
+pathspec = ">=0.11.1"
+pyyaml = ">=5.1"
+pyyaml-env-tag = ">=0.1"
+watchdog = ">=2.0"
+
+[package.extras]
+i18n = ["babel (>=2.9.0)"]
+min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"]
+
+[[package]]
+name = "mkdocs-autorefs"
+version = "1.3.0"
+description = "Automatically link across pages in MkDocs."
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "mkdocs_autorefs-1.3.0-py3-none-any.whl", hash = "sha256:d180f9778a04e78b7134e31418f238bba56f56d6a8af97873946ff661befffb3"},
+ {file = "mkdocs_autorefs-1.3.0.tar.gz", hash = "sha256:6867764c099ace9025d6ac24fd07b85a98335fbd30107ef01053697c8f46db61"},
+]
+
+[package.dependencies]
+Markdown = ">=3.3"
+markupsafe = ">=2.0.1"
+mkdocs = ">=1.1"
+
+[[package]]
+name = "mkdocs-gen-files"
+version = "0.5.0"
+description = "MkDocs plugin to programmatically generate documentation pages during the build"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "mkdocs_gen_files-0.5.0-py3-none-any.whl", hash = "sha256:7ac060096f3f40bd19039e7277dd3050be9a453c8ac578645844d4d91d7978ea"},
+ {file = "mkdocs_gen_files-0.5.0.tar.gz", hash = "sha256:4c7cf256b5d67062a788f6b1d035e157fc1a9498c2399be9af5257d4ff4d19bc"},
+]
+
+[package.dependencies]
+mkdocs = ">=1.0.3"
+
+[[package]]
+name = "mkdocs-get-deps"
+version = "0.2.0"
+description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"},
+ {file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"},
+]
+
+[package.dependencies]
+importlib-metadata = {version = ">=4.3", markers = "python_version < \"3.10\""}
+mergedeep = ">=1.3.4"
+platformdirs = ">=2.2.0"
+pyyaml = ">=5.1"
+
+[[package]]
+name = "mkdocs-literate-nav"
+version = "0.6.1"
+description = "MkDocs plugin to specify the navigation in Markdown instead of YAML"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "mkdocs_literate_nav-0.6.1-py3-none-any.whl", hash = "sha256:e70bdc4a07050d32da79c0b697bd88e9a104cf3294282e9cb20eec94c6b0f401"},
+ {file = "mkdocs_literate_nav-0.6.1.tar.gz", hash = "sha256:78a7ab6d878371728acb0cdc6235c9b0ffc6e83c997b037f4a5c6ff7cef7d759"},
+]
+
+[package.dependencies]
+mkdocs = ">=1.0.3"
+
+[[package]]
+name = "mkdocs-material"
+version = "9.5.49"
+description = "Documentation that simply works"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "mkdocs_material-9.5.49-py3-none-any.whl", hash = "sha256:c3c2d8176b18198435d3a3e119011922f3e11424074645c24019c2dcf08a360e"},
+ {file = "mkdocs_material-9.5.49.tar.gz", hash = "sha256:3671bb282b4f53a1c72e08adbe04d2481a98f85fed392530051f80ff94a9621d"},
+]
+
+[package.dependencies]
+babel = ">=2.10,<3.0"
+colorama = ">=0.4,<1.0"
+jinja2 = ">=3.0,<4.0"
+markdown = ">=3.2,<4.0"
+mkdocs = ">=1.6,<2.0"
+mkdocs-material-extensions = ">=1.3,<2.0"
+paginate = ">=0.5,<1.0"
+pygments = ">=2.16,<3.0"
+pymdown-extensions = ">=10.2,<11.0"
+regex = ">=2022.4"
+requests = ">=2.26,<3.0"
+
+[package.extras]
+git = ["mkdocs-git-committers-plugin-2 (>=1.1,<2.0)", "mkdocs-git-revision-date-localized-plugin (>=1.2.4,<2.0)"]
+imaging = ["cairosvg (>=2.6,<3.0)", "pillow (>=10.2,<11.0)"]
+recommended = ["mkdocs-minify-plugin (>=0.7,<1.0)", "mkdocs-redirects (>=1.2,<2.0)", "mkdocs-rss-plugin (>=1.6,<2.0)"]
+
+[[package]]
+name = "mkdocs-material-extensions"
+version = "1.3.1"
+description = "Extension pack for Python Markdown and MkDocs Material."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31"},
+ {file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"},
+]
+
+[[package]]
+name = "mkdocs-section-index"
+version = "0.3.9"
+description = "MkDocs plugin to allow clickable sections that lead to an index page"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "mkdocs_section_index-0.3.9-py3-none-any.whl", hash = "sha256:5e5eb288e8d7984d36c11ead5533f376fdf23498f44e903929d72845b24dfe34"},
+ {file = "mkdocs_section_index-0.3.9.tar.gz", hash = "sha256:b66128d19108beceb08b226ee1ba0981840d14baf8a652b6c59e650f3f92e4f8"},
+]
+
+[package.dependencies]
+mkdocs = ">=1.2"
+
+[[package]]
+name = "mkdocstrings"
+version = "0.27.0"
+description = "Automatic documentation from sources, for MkDocs."
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "mkdocstrings-0.27.0-py3-none-any.whl", hash = "sha256:6ceaa7ea830770959b55a16203ac63da24badd71325b96af950e59fd37366332"},
+ {file = "mkdocstrings-0.27.0.tar.gz", hash = "sha256:16adca6d6b0a1f9e0c07ff0b02ced8e16f228a9d65a37c063ec4c14d7b76a657"},
+]
+
+[package.dependencies]
+click = ">=7.0"
+importlib-metadata = {version = ">=4.6", markers = "python_version < \"3.10\""}
+Jinja2 = ">=2.11.1"
+Markdown = ">=3.6"
+MarkupSafe = ">=1.1"
+mkdocs = ">=1.4"
+mkdocs-autorefs = ">=1.2"
+platformdirs = ">=2.2"
+pymdown-extensions = ">=6.3"
+typing-extensions = {version = ">=4.1", markers = "python_version < \"3.10\""}
+
+[package.extras]
+crystal = ["mkdocstrings-crystal (>=0.3.4)"]
+python = ["mkdocstrings-python (>=0.5.2)"]
+python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"]
+
+[[package]]
+name = "mkdocstrings-python"
+version = "1.13.0"
+description = "A Python handler for mkdocstrings."
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "mkdocstrings_python-1.13.0-py3-none-any.whl", hash = "sha256:b88bbb207bab4086434743849f8e796788b373bd32e7bfefbf8560ac45d88f97"},
+ {file = "mkdocstrings_python-1.13.0.tar.gz", hash = "sha256:2dbd5757e8375b9720e81db16f52f1856bf59905428fd7ef88005d1370e2f64c"},
+]
+
+[package.dependencies]
+griffe = ">=0.49"
+mkdocs-autorefs = ">=1.2"
+mkdocstrings = ">=0.26"
+
[[package]]
name = "mmh3"
version = "5.0.1"
@@ -2249,13 +2599,13 @@ type = ["mypy (==1.11.2)"]
[[package]]
name = "moto"
-version = "5.0.24"
-description = ""
+version = "5.0.26"
+description = "A library that allows you to easily mock out tests based on AWS infrastructure"
optional = false
python-versions = ">=3.8"
files = [
- {file = "moto-5.0.24-py3-none-any.whl", hash = "sha256:4d826f1574849f18ddd2fcbf614d97f82c8fddfb9d95fac1078da01a39b57c10"},
- {file = "moto-5.0.24.tar.gz", hash = "sha256:dba6426bd770fbb9d892633fbd35253cbc181eeaa0eba97d6f058720a8fe9b42"},
+ {file = "moto-5.0.26-py3-none-any.whl", hash = "sha256:803831f427ca6c0452ae4fb898d731cfc19906466a33a88cbc1076abcbfcbba7"},
+ {file = "moto-5.0.26.tar.gz", hash = "sha256:6829f58a670a087e7c5b63f8183c6b72d64a1444e420c212250b7326b69a9183"},
]
[package.dependencies]
@@ -2271,10 +2621,9 @@ flask-cors = {version = "*", optional = true, markers = "extra == \"server\""}
graphql-core = {version = "*", optional = true, markers = "extra == \"server\""}
Jinja2 = ">=2.10.1"
joserfc = {version = ">=0.9.0", optional = true, markers = "extra == \"server\""}
-jsondiff = {version = ">=1.1.2", optional = true, markers = "extra == \"server\""}
jsonpath-ng = {version = "*", optional = true, markers = "extra == \"server\""}
openapi-spec-validator = {version = ">=0.5.0", optional = true, markers = "extra == \"server\""}
-py-partiql-parser = {version = "0.5.6", optional = true, markers = "extra == \"server\""}
+py-partiql-parser = {version = "0.6.1", optional = true, markers = "extra == \"server\""}
pyparsing = {version = ">=3.0.7", optional = true, markers = "extra == \"server\""}
python-dateutil = ">=2.1,<3.0.0"
PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"server\""}
@@ -2285,25 +2634,24 @@ werkzeug = ">=0.5,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1"
xmltodict = "*"
[package.extras]
-all = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "jsonschema", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)", "setuptools"]
+all = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsonpath-ng", "jsonschema", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)", "setuptools"]
apigateway = ["PyYAML (>=5.1)", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)"]
apigatewayv2 = ["PyYAML (>=5.1)", "openapi-spec-validator (>=0.5.0)"]
appsync = ["graphql-core"]
awslambda = ["docker (>=3.0.0)"]
batch = ["docker (>=3.0.0)"]
-cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)", "setuptools"]
+cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)", "setuptools"]
cognitoidp = ["joserfc (>=0.9.0)"]
-dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.6)"]
-dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.6)"]
+dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.6.1)"]
+dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.6.1)"]
events = ["jsonpath-ng"]
glue = ["pyparsing (>=3.0.7)"]
-iotdata = ["jsondiff (>=1.1.2)"]
-proxy = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)", "setuptools"]
+proxy = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "graphql-core", "joserfc (>=0.9.0)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)", "setuptools"]
quicksight = ["jsonschema"]
-resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)"]
-s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.5.6)"]
-s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.5.6)"]
-server = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.6)", "pyparsing (>=3.0.7)", "setuptools"]
+resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)"]
+s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.6.1)"]
+s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.6.1)"]
+server = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "joserfc (>=0.9.0)", "jsonpath-ng", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.6.1)", "pyparsing (>=3.0.7)", "setuptools"]
ssm = ["PyYAML (>=5.1)"]
stepfunctions = ["antlr4-python3-runtime", "jsonpath-ng"]
xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"]
@@ -2538,17 +2886,32 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""}
[[package]]
name = "mypy-boto3-glue"
-version = "1.35.87"
-description = "Type annotations for boto3 Glue 1.35.87 service generated with mypy-boto3-builder 8.7.0"
+version = "1.35.93"
+description = "Type annotations for boto3 Glue 1.35.93 service generated with mypy-boto3-builder 8.8.0"
optional = true
python-versions = ">=3.8"
files = [
- {file = "mypy_boto3_glue-1.35.87-py3-none-any.whl", hash = "sha256:c4c62daf80e99ad539491b63814b7cf94a5e4f1fca732540a9aaae458af52691"},
- {file = "mypy_boto3_glue-1.35.87.tar.gz", hash = "sha256:d1d5f1bb5c5297045a1a650a6672c46a319e3cf373085d2303c2179dc5b46d7d"},
+ {file = "mypy_boto3_glue-1.35.93-py3-none-any.whl", hash = "sha256:cf46553f68048124bad65345b593ec5ba3806bd9bd15a1d7516d0cb3d79a0652"},
+ {file = "mypy_boto3_glue-1.35.93.tar.gz", hash = "sha256:27759a83ffa5414b2589da83625816a3c7cb97600fec68578bd3012a9ae20ee8"},
]
[package.dependencies]
-typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""}
+typing-extensions = {version = "*", markers = "python_version < \"3.12\""}
+
+[[package]]
+name = "natsort"
+version = "8.4.0"
+description = "Simple yet flexible natural sorting in Python."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"},
+ {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"},
+]
+
+[package.extras]
+fast = ["fastnumbers (>=2.0.0)"]
+icu = ["PyICU (>=1.0.0)"]
[[package]]
name = "networkx"
@@ -2686,6 +3049,21 @@ files = [
{file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
]
+[[package]]
+name = "paginate"
+version = "0.5.7"
+description = "Divides large result sets into pages for easier browsing"
+optional = false
+python-versions = "*"
+files = [
+ {file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"},
+ {file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"},
+]
+
+[package.extras]
+dev = ["pytest", "tox"]
+lint = ["black"]
+
[[package]]
name = "pandas"
version = "2.2.3"
@@ -2783,6 +3161,17 @@ files = [
{file = "pathable-0.4.3.tar.gz", hash = "sha256:5c869d315be50776cc8a993f3af43e0c60dc01506b399643f919034ebf4cdcab"},
]
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
[[package]]
name = "platformdirs"
version = "4.3.6"
@@ -3044,6 +3433,7 @@ files = [
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
+ {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
{file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
@@ -3068,13 +3458,13 @@ files = [
[[package]]
name = "py-partiql-parser"
-version = "0.5.6"
+version = "0.6.1"
description = "Pure Python PartiQL Parser"
optional = false
python-versions = "*"
files = [
- {file = "py_partiql_parser-0.5.6-py2.py3-none-any.whl", hash = "sha256:622d7b0444becd08c1f4e9e73b31690f4b1c309ab6e5ed45bf607fe71319309f"},
- {file = "py_partiql_parser-0.5.6.tar.gz", hash = "sha256:6339f6bf85573a35686529fc3f491302e71dd091711dfe8df3be89a93767f97b"},
+ {file = "py_partiql_parser-0.6.1-py2.py3-none-any.whl", hash = "sha256:ff6a48067bff23c37e9044021bf1d949c83e195490c17e020715e927fe5b2456"},
+ {file = "py_partiql_parser-0.6.1.tar.gz", hash = "sha256:8583ff2a0e15560ef3bc3df109a7714d17f87d81d33e8c38b7fed4e58a63215d"},
]
[package.extras]
@@ -3183,13 +3573,13 @@ files = [
[[package]]
name = "pydantic"
-version = "2.10.4"
+version = "2.10.5"
description = "Data validation using Python type hints"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pydantic-2.10.4-py3-none-any.whl", hash = "sha256:597e135ea68be3a37552fb524bc7d0d66dcf93d395acd93a00682f1efcb8ee3d"},
- {file = "pydantic-2.10.4.tar.gz", hash = "sha256:82f12e9723da6de4fe2ba888b5971157b3be7ad914267dea8f05f82b28254f06"},
+ {file = "pydantic-2.10.5-py3-none-any.whl", hash = "sha256:4dd4e322dbe55472cb7ca7e73f4b63574eecccf2835ffa2af9021ce113c83c53"},
+ {file = "pydantic-2.10.5.tar.gz", hash = "sha256:278b38dbbaec562011d659ee05f63346951b3a248a6f3642e1bc68894ea2b4ff"},
]
[package.dependencies]
@@ -3362,15 +3752,33 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
+[[package]]
+name = "pymdown-extensions"
+version = "10.13"
+description = "Extension pack for Python Markdown."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pymdown_extensions-10.13-py3-none-any.whl", hash = "sha256:80bc33d715eec68e683e04298946d47d78c7739e79d808203df278ee8ef89428"},
+ {file = "pymdown_extensions-10.13.tar.gz", hash = "sha256:e0b351494dc0d8d14a1f52b39b1499a00ef1566b4ba23dc74f1eba75c736f5dd"},
+]
+
+[package.dependencies]
+markdown = ">=3.6"
+pyyaml = "*"
+
+[package.extras]
+extra = ["pygments (>=2.12)"]
+
[[package]]
name = "pyparsing"
-version = "3.2.0"
+version = "3.2.1"
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
optional = false
python-versions = ">=3.9"
files = [
- {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"},
- {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"},
+ {file = "pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1"},
+ {file = "pyparsing-3.2.1.tar.gz", hash = "sha256:61980854fd66de3a90028d679a954d5f2623e83144b5afe5ee86f43d762e5f0a"},
]
[package.extras]
@@ -3431,23 +3839,22 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no
[[package]]
name = "pytest-checkdocs"
-version = "2.10.1"
+version = "2.13.0"
description = "check the README when running tests"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pytest-checkdocs-2.10.1.tar.gz", hash = "sha256:393868583f2d0314f8c5828fd94f7d28699543f6a0a925356d7e274e2952297e"},
- {file = "pytest_checkdocs-2.10.1-py3-none-any.whl", hash = "sha256:f069d6408633697023298ebf66c9bb1cb915c3ae5f047457b507229a4784e153"},
+ {file = "pytest_checkdocs-2.13.0-py3-none-any.whl", hash = "sha256:5df5bbd7e9753aa51a5f6954a301a4066bd4a04eb7e0c712c5d5d7ede1cbe153"},
+ {file = "pytest_checkdocs-2.13.0.tar.gz", hash = "sha256:b0e67169c543986142e15afbc17c772da87fcdb0922c7b1e4f6c60f8769f11f9"},
]
[package.dependencies]
-build = "*"
docutils = ">=0.15"
-importlib-metadata = {version = ">=4", markers = "python_version < \"3.10\""}
+"jaraco.packaging" = ">=9.5"
[package.extras]
docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff", "types-docutils"]
+testing = ["pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "types-docutils"]
[[package]]
name = "pytest-lazy-fixture"
@@ -3608,6 +4015,20 @@ files = [
{file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
]
+[[package]]
+name = "pyyaml-env-tag"
+version = "0.1"
+description = "A custom YAML tag for referencing environment variables in YAML files. "
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"},
+ {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"},
+]
+
+[package.dependencies]
+pyyaml = "*"
+
[[package]]
name = "ray"
version = "2.40.0"
@@ -4100,6 +4521,17 @@ files = [
{file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
]
+[[package]]
+name = "snowballstemmer"
+version = "2.2.0"
+description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms."
+optional = false
+python-versions = "*"
+files = [
+ {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
+ {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
+]
+
[[package]]
name = "sortedcontainers"
version = "2.4.0"
@@ -4111,74 +4543,204 @@ files = [
{file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
]
+[[package]]
+name = "sphinx"
+version = "7.4.7"
+description = "Python documentation generator"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239"},
+ {file = "sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe"},
+]
+
+[package.dependencies]
+alabaster = ">=0.7.14,<0.8.0"
+babel = ">=2.13"
+colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""}
+docutils = ">=0.20,<0.22"
+imagesize = ">=1.3"
+importlib-metadata = {version = ">=6.0", markers = "python_version < \"3.10\""}
+Jinja2 = ">=3.1"
+packaging = ">=23.0"
+Pygments = ">=2.17"
+requests = ">=2.30.0"
+snowballstemmer = ">=2.2"
+sphinxcontrib-applehelp = "*"
+sphinxcontrib-devhelp = "*"
+sphinxcontrib-htmlhelp = ">=2.0.0"
+sphinxcontrib-jsmath = "*"
+sphinxcontrib-qthelp = "*"
+sphinxcontrib-serializinghtml = ">=1.1.9"
+tomli = {version = ">=2", markers = "python_version < \"3.11\""}
+
+[package.extras]
+docs = ["sphinxcontrib-websupport"]
+lint = ["flake8 (>=6.0)", "importlib-metadata (>=6.0)", "mypy (==1.10.1)", "pytest (>=6.0)", "ruff (==0.5.2)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-docutils (==0.21.0.20240711)", "types-requests (>=2.30.0)"]
+test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"]
+
+[[package]]
+name = "sphinxcontrib-applehelp"
+version = "2.0.0"
+description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5"},
+ {file = "sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-devhelp"
+version = "2.0.0"
+description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2"},
+ {file = "sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-htmlhelp"
+version = "2.1.0"
+description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8"},
+ {file = "sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["html5lib", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-jsmath"
+version = "1.0.1"
+description = "A sphinx extension which renders display math in HTML via JavaScript"
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"},
+ {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
+]
+
+[package.extras]
+test = ["flake8", "mypy", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-qthelp"
+version = "2.0.0"
+description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb"},
+ {file = "sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["defusedxml (>=0.7.1)", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-serializinghtml"
+version = "2.0.0"
+description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331"},
+ {file = "sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d"},
+]
+
+[package.extras]
+lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
+standalone = ["Sphinx (>=5)"]
+test = ["pytest"]
+
[[package]]
name = "sqlalchemy"
-version = "2.0.36"
+version = "2.0.37"
description = "Database Abstraction Library"
optional = true
python-versions = ">=3.7"
files = [
- {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:59b8f3adb3971929a3e660337f5dacc5942c2cdb760afcabb2614ffbda9f9f72"},
- {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37350015056a553e442ff672c2d20e6f4b6d0b2495691fa239d8aa18bb3bc908"},
- {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8318f4776c85abc3f40ab185e388bee7a6ea99e7fa3a30686580b209eaa35c08"},
- {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c245b1fbade9c35e5bd3b64270ab49ce990369018289ecfde3f9c318411aaa07"},
- {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:69f93723edbca7342624d09f6704e7126b152eaed3cdbb634cb657a54332a3c5"},
- {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f9511d8dd4a6e9271d07d150fb2f81874a3c8c95e11ff9af3a2dfc35fe42ee44"},
- {file = "SQLAlchemy-2.0.36-cp310-cp310-win32.whl", hash = "sha256:c3f3631693003d8e585d4200730616b78fafd5a01ef8b698f6967da5c605b3fa"},
- {file = "SQLAlchemy-2.0.36-cp310-cp310-win_amd64.whl", hash = "sha256:a86bfab2ef46d63300c0f06936bd6e6c0105faa11d509083ba8f2f9d237fb5b5"},
- {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fd3a55deef00f689ce931d4d1b23fa9f04c880a48ee97af488fd215cf24e2a6c"},
- {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4f5e9cd989b45b73bd359f693b935364f7e1f79486e29015813c338450aa5a71"},
- {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ddd9db6e59c44875211bc4c7953a9f6638b937b0a88ae6d09eb46cced54eff"},
- {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2519f3a5d0517fc159afab1015e54bb81b4406c278749779be57a569d8d1bb0d"},
- {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59b1ee96617135f6e1d6f275bbe988f419c5178016f3d41d3c0abb0c819f75bb"},
- {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:39769a115f730d683b0eb7b694db9789267bcd027326cccc3125e862eb03bfd8"},
- {file = "SQLAlchemy-2.0.36-cp311-cp311-win32.whl", hash = "sha256:66bffbad8d6271bb1cc2f9a4ea4f86f80fe5e2e3e501a5ae2a3dc6a76e604e6f"},
- {file = "SQLAlchemy-2.0.36-cp311-cp311-win_amd64.whl", hash = "sha256:23623166bfefe1487d81b698c423f8678e80df8b54614c2bf4b4cfcd7c711959"},
- {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7b64e6ec3f02c35647be6b4851008b26cff592a95ecb13b6788a54ef80bbdd4"},
- {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46331b00096a6db1fdc052d55b101dbbfc99155a548e20a0e4a8e5e4d1362855"},
- {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdf3386a801ea5aba17c6410dd1dc8d39cf454ca2565541b5ac42a84e1e28f53"},
- {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9dfa18ff2a67b09b372d5db8743c27966abf0e5344c555d86cc7199f7ad83a"},
- {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:90812a8933df713fdf748b355527e3af257a11e415b613dd794512461eb8a686"},
- {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1bc330d9d29c7f06f003ab10e1eaced295e87940405afe1b110f2eb93a233588"},
- {file = "SQLAlchemy-2.0.36-cp312-cp312-win32.whl", hash = "sha256:79d2e78abc26d871875b419e1fd3c0bca31a1cb0043277d0d850014599626c2e"},
- {file = "SQLAlchemy-2.0.36-cp312-cp312-win_amd64.whl", hash = "sha256:b544ad1935a8541d177cb402948b94e871067656b3a0b9e91dbec136b06a2ff5"},
- {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5cc79df7f4bc3d11e4b542596c03826063092611e481fcf1c9dfee3c94355ef"},
- {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3c01117dd36800f2ecaa238c65365b7b16497adc1522bf84906e5710ee9ba0e8"},
- {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9bc633f4ee4b4c46e7adcb3a9b5ec083bf1d9a97c1d3854b92749d935de40b9b"},
- {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e46ed38affdfc95d2c958de328d037d87801cfcbea6d421000859e9789e61c2"},
- {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b2985c0b06e989c043f1dc09d4fe89e1616aadd35392aea2844f0458a989eacf"},
- {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a121d62ebe7d26fec9155f83f8be5189ef1405f5973ea4874a26fab9f1e262c"},
- {file = "SQLAlchemy-2.0.36-cp313-cp313-win32.whl", hash = "sha256:0572f4bd6f94752167adfd7c1bed84f4b240ee6203a95e05d1e208d488d0d436"},
- {file = "SQLAlchemy-2.0.36-cp313-cp313-win_amd64.whl", hash = "sha256:8c78ac40bde930c60e0f78b3cd184c580f89456dd87fc08f9e3ee3ce8765ce88"},
- {file = "SQLAlchemy-2.0.36-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:be9812b766cad94a25bc63bec11f88c4ad3629a0cec1cd5d4ba48dc23860486b"},
- {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aae840ebbd6cdd41af1c14590e5741665e5272d2fee999306673a1bb1fdb4d"},
- {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4557e1f11c5f653ebfdd924f3f9d5ebfc718283b0b9beebaa5dd6b77ec290971"},
- {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07b441f7d03b9a66299ce7ccf3ef2900abc81c0db434f42a5694a37bd73870f2"},
- {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:28120ef39c92c2dd60f2721af9328479516844c6b550b077ca450c7d7dc68575"},
- {file = "SQLAlchemy-2.0.36-cp37-cp37m-win32.whl", hash = "sha256:b81ee3d84803fd42d0b154cb6892ae57ea6b7c55d8359a02379965706c7efe6c"},
- {file = "SQLAlchemy-2.0.36-cp37-cp37m-win_amd64.whl", hash = "sha256:f942a799516184c855e1a32fbc7b29d7e571b52612647866d4ec1c3242578fcb"},
- {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3d6718667da04294d7df1670d70eeddd414f313738d20a6f1d1f379e3139a545"},
- {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:72c28b84b174ce8af8504ca28ae9347d317f9dba3999e5981a3cd441f3712e24"},
- {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b11d0cfdd2b095e7b0686cf5fabeb9c67fae5b06d265d8180715b8cfa86522e3"},
- {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e32092c47011d113dc01ab3e1d3ce9f006a47223b18422c5c0d150af13a00687"},
- {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6a440293d802d3011028e14e4226da1434b373cbaf4a4bbb63f845761a708346"},
- {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c54a1e53a0c308a8e8a7dffb59097bff7facda27c70c286f005327f21b2bd6b1"},
- {file = "SQLAlchemy-2.0.36-cp38-cp38-win32.whl", hash = "sha256:1e0d612a17581b6616ff03c8e3d5eff7452f34655c901f75d62bd86449d9750e"},
- {file = "SQLAlchemy-2.0.36-cp38-cp38-win_amd64.whl", hash = "sha256:8958b10490125124463095bbdadda5aa22ec799f91958e410438ad6c97a7b793"},
- {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dc022184d3e5cacc9579e41805a681187650e170eb2fd70e28b86192a479dcaa"},
- {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b817d41d692bf286abc181f8af476c4fbef3fd05e798777492618378448ee689"},
- {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4e46a888b54be23d03a89be510f24a7652fe6ff660787b96cd0e57a4ebcb46d"},
- {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4ae3005ed83f5967f961fd091f2f8c5329161f69ce8480aa8168b2d7fe37f06"},
- {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03e08af7a5f9386a43919eda9de33ffda16b44eb11f3b313e6822243770e9763"},
- {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3dbb986bad3ed5ceaf090200eba750b5245150bd97d3e67343a3cfed06feecf7"},
- {file = "SQLAlchemy-2.0.36-cp39-cp39-win32.whl", hash = "sha256:9fe53b404f24789b5ea9003fc25b9a3988feddebd7e7b369c8fac27ad6f52f28"},
- {file = "SQLAlchemy-2.0.36-cp39-cp39-win_amd64.whl", hash = "sha256:af148a33ff0349f53512a049c6406923e4e02bf2f26c5fb285f143faf4f0e46a"},
- {file = "SQLAlchemy-2.0.36-py3-none-any.whl", hash = "sha256:fddbe92b4760c6f5d48162aef14824add991aeda8ddadb3c31d56eb15ca69f8e"},
- {file = "sqlalchemy-2.0.36.tar.gz", hash = "sha256:7f2767680b6d2398aea7082e45a774b2b0767b5c8d8ffb9c8b683088ea9b29c5"},
+ {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da36c3b0e891808a7542c5c89f224520b9a16c7f5e4d6a1156955605e54aef0e"},
+ {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e7402ff96e2b073a98ef6d6142796426d705addd27b9d26c3b32dbaa06d7d069"},
+ {file = "SQLAlchemy-2.0.37-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6f5d254a22394847245f411a2956976401e84da4288aa70cbcd5190744062c1"},
+ {file = "SQLAlchemy-2.0.37-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41296bbcaa55ef5fdd32389a35c710133b097f7b2609d8218c0eabded43a1d84"},
+ {file = "SQLAlchemy-2.0.37-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bedee60385c1c0411378cbd4dc486362f5ee88deceea50002772912d798bb00f"},
+ {file = "SQLAlchemy-2.0.37-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6c67415258f9f3c69867ec02fea1bf6508153709ecbd731a982442a590f2b7e4"},
+ {file = "SQLAlchemy-2.0.37-cp310-cp310-win32.whl", hash = "sha256:650dcb70739957a492ad8acff65d099a9586b9b8920e3507ca61ec3ce650bb72"},
+ {file = "SQLAlchemy-2.0.37-cp310-cp310-win_amd64.whl", hash = "sha256:93d1543cd8359040c02b6614421c8e10cd7a788c40047dbc507ed46c29ae5636"},
+ {file = "SQLAlchemy-2.0.37-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:78361be6dc9073ed17ab380985d1e45e48a642313ab68ab6afa2457354ff692c"},
+ {file = "SQLAlchemy-2.0.37-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b661b49d0cb0ab311a189b31e25576b7ac3e20783beb1e1817d72d9d02508bf5"},
+ {file = "SQLAlchemy-2.0.37-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d57bafbab289e147d064ffbd5cca2d7b1394b63417c0636cea1f2e93d16eb9e8"},
+ {file = "SQLAlchemy-2.0.37-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fa2c0913f02341d25fb858e4fb2031e6b0813494cca1ba07d417674128ce11b"},
+ {file = "SQLAlchemy-2.0.37-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9df21b8d9e5c136ea6cde1c50d2b1c29a2b5ff2b1d610165c23ff250e0704087"},
+ {file = "SQLAlchemy-2.0.37-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db18ff6b8c0f1917f8b20f8eca35c28bbccb9f83afa94743e03d40203ed83de9"},
+ {file = "SQLAlchemy-2.0.37-cp311-cp311-win32.whl", hash = "sha256:46954173612617a99a64aee103bcd3f078901b9a8dcfc6ae80cbf34ba23df989"},
+ {file = "SQLAlchemy-2.0.37-cp311-cp311-win_amd64.whl", hash = "sha256:7b7e772dc4bc507fdec4ee20182f15bd60d2a84f1e087a8accf5b5b7a0dcf2ba"},
+ {file = "SQLAlchemy-2.0.37-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2952748ecd67ed3b56773c185e85fc084f6bdcdec10e5032a7c25a6bc7d682ef"},
+ {file = "SQLAlchemy-2.0.37-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3151822aa1db0eb5afd65ccfafebe0ef5cda3a7701a279c8d0bf17781a793bb4"},
+ {file = "SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eaa8039b6d20137a4e02603aba37d12cd2dde7887500b8855356682fc33933f4"},
+ {file = "SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1cdba1f73b64530c47b27118b7053b8447e6d6f3c8104e3ac59f3d40c33aa9fd"},
+ {file = "SQLAlchemy-2.0.37-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1b2690456528a87234a75d1a1644cdb330a6926f455403c8e4f6cad6921f9098"},
+ {file = "SQLAlchemy-2.0.37-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf5ae8a9dcf657fd72144a7fd01f243236ea39e7344e579a121c4205aedf07bb"},
+ {file = "SQLAlchemy-2.0.37-cp312-cp312-win32.whl", hash = "sha256:ea308cec940905ba008291d93619d92edaf83232ec85fbd514dcb329f3192761"},
+ {file = "SQLAlchemy-2.0.37-cp312-cp312-win_amd64.whl", hash = "sha256:635d8a21577341dfe4f7fa59ec394b346da12420b86624a69e466d446de16aff"},
+ {file = "SQLAlchemy-2.0.37-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8c4096727193762e72ce9437e2a86a110cf081241919ce3fab8e89c02f6b6658"},
+ {file = "SQLAlchemy-2.0.37-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e4fb5ac86d8fe8151966814f6720996430462e633d225497566b3996966b9bdb"},
+ {file = "SQLAlchemy-2.0.37-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e56a139bfe136a22c438478a86f8204c1eb5eed36f4e15c4224e4b9db01cb3e4"},
+ {file = "SQLAlchemy-2.0.37-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f95fc8e3f34b5f6b3effb49d10ac97c569ec8e32f985612d9b25dd12d0d2e94"},
+ {file = "SQLAlchemy-2.0.37-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c505edd429abdfe3643fa3b2e83efb3445a34a9dc49d5f692dd087be966020e0"},
+ {file = "SQLAlchemy-2.0.37-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:12b0f1ec623cccf058cf21cb544f0e74656618165b083d78145cafde156ea7b6"},
+ {file = "SQLAlchemy-2.0.37-cp313-cp313-win32.whl", hash = "sha256:293f9ade06b2e68dd03cfb14d49202fac47b7bb94bffcff174568c951fbc7af2"},
+ {file = "SQLAlchemy-2.0.37-cp313-cp313-win_amd64.whl", hash = "sha256:d70f53a0646cc418ca4853da57cf3ddddbccb8c98406791f24426f2dd77fd0e2"},
+ {file = "SQLAlchemy-2.0.37-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:44f569d0b1eb82301b92b72085583277316e7367e038d97c3a1a899d9a05e342"},
+ {file = "SQLAlchemy-2.0.37-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2eae3423e538c10d93ae3e87788c6a84658c3ed6db62e6a61bb9495b0ad16bb"},
+ {file = "SQLAlchemy-2.0.37-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfff7be361048244c3aa0f60b5e63221c5e0f0e509f4e47b8910e22b57d10ae7"},
+ {file = "SQLAlchemy-2.0.37-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:5bc3339db84c5fb9130ac0e2f20347ee77b5dd2596ba327ce0d399752f4fce39"},
+ {file = "SQLAlchemy-2.0.37-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:84b9f23b0fa98a6a4b99d73989350a94e4a4ec476b9a7dfe9b79ba5939f5e80b"},
+ {file = "SQLAlchemy-2.0.37-cp37-cp37m-win32.whl", hash = "sha256:51bc9cfef83e0ac84f86bf2b10eaccb27c5a3e66a1212bef676f5bee6ef33ebb"},
+ {file = "SQLAlchemy-2.0.37-cp37-cp37m-win_amd64.whl", hash = "sha256:8e47f1af09444f87c67b4f1bb6231e12ba6d4d9f03050d7fc88df6d075231a49"},
+ {file = "SQLAlchemy-2.0.37-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6b788f14c5bb91db7f468dcf76f8b64423660a05e57fe277d3f4fad7b9dcb7ce"},
+ {file = "SQLAlchemy-2.0.37-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:521ef85c04c33009166777c77e76c8a676e2d8528dc83a57836b63ca9c69dcd1"},
+ {file = "SQLAlchemy-2.0.37-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75311559f5c9881a9808eadbeb20ed8d8ba3f7225bef3afed2000c2a9f4d49b9"},
+ {file = "SQLAlchemy-2.0.37-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cce918ada64c956b62ca2c2af59b125767097ec1dca89650a6221e887521bfd7"},
+ {file = "SQLAlchemy-2.0.37-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9d087663b7e1feabea8c578d6887d59bb00388158e8bff3a76be11aa3f748ca2"},
+ {file = "SQLAlchemy-2.0.37-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cf95a60b36997dad99692314c4713f141b61c5b0b4cc5c3426faad570b31ca01"},
+ {file = "SQLAlchemy-2.0.37-cp38-cp38-win32.whl", hash = "sha256:d75ead7dd4d255068ea0f21492ee67937bd7c90964c8f3c2bea83c7b7f81b95f"},
+ {file = "SQLAlchemy-2.0.37-cp38-cp38-win_amd64.whl", hash = "sha256:74bbd1d0a9bacf34266a7907d43260c8d65d31d691bb2356f41b17c2dca5b1d0"},
+ {file = "SQLAlchemy-2.0.37-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:648ec5acf95ad59255452ef759054f2176849662af4521db6cb245263ae4aa33"},
+ {file = "SQLAlchemy-2.0.37-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:35bd2df269de082065d4b23ae08502a47255832cc3f17619a5cea92ce478b02b"},
+ {file = "SQLAlchemy-2.0.37-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f581d365af9373a738c49e0c51e8b18e08d8a6b1b15cc556773bcd8a192fa8b"},
+ {file = "SQLAlchemy-2.0.37-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82df02816c14f8dc9f4d74aea4cb84a92f4b0620235daa76dde002409a3fbb5a"},
+ {file = "SQLAlchemy-2.0.37-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:94b564e38b344d3e67d2e224f0aec6ba09a77e4582ced41e7bfd0f757d926ec9"},
+ {file = "SQLAlchemy-2.0.37-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:955a2a765aa1bd81aafa69ffda179d4fe3e2a3ad462a736ae5b6f387f78bfeb8"},
+ {file = "SQLAlchemy-2.0.37-cp39-cp39-win32.whl", hash = "sha256:03f0528c53ca0b67094c4764523c1451ea15959bbf0a8a8a3096900014db0278"},
+ {file = "SQLAlchemy-2.0.37-cp39-cp39-win_amd64.whl", hash = "sha256:4b12885dc85a2ab2b7d00995bac6d967bffa8594123b02ed21e8eb2205a7584b"},
+ {file = "SQLAlchemy-2.0.37-py3-none-any.whl", hash = "sha256:a8998bf9f8658bd3839cbc44ddbe982955641863da0c1efe5b00c1ab4f5c16b1"},
+ {file = "sqlalchemy-2.0.37.tar.gz", hash = "sha256:12b28d99a9c14eaf4055810df1001557176716de0167b91026e648e65229bffb"},
]
[package.dependencies]
-greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"}
+greenlet = {version = "!=0.4.17", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"}
typing-extensions = ">=4.6.0"
[package.extras]
@@ -4418,6 +4980,48 @@ platformdirs = ">=3.9.1,<5"
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
+[[package]]
+name = "watchdog"
+version = "6.0.0"
+description = "Filesystem events monitoring"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26"},
+ {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112"},
+ {file = "watchdog-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3"},
+ {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c"},
+ {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2"},
+ {file = "watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c"},
+ {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948"},
+ {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860"},
+ {file = "watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0"},
+ {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c"},
+ {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134"},
+ {file = "watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b"},
+ {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e6f0e77c9417e7cd62af82529b10563db3423625c5fce018430b249bf977f9e8"},
+ {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:90c8e78f3b94014f7aaae121e6b909674df5b46ec24d6bebc45c44c56729af2a"},
+ {file = "watchdog-6.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7631a77ffb1f7d2eefa4445ebbee491c720a5661ddf6df3498ebecae5ed375c"},
+ {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881"},
+ {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11"},
+ {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7a0e56874cfbc4b9b05c60c8a1926fedf56324bb08cfbc188969777940aef3aa"},
+ {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6439e374fc012255b4ec786ae3c4bc838cd7309a540e5fe0952d03687d8804e"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2"},
+ {file = "watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a"},
+ {file = "watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680"},
+ {file = "watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f"},
+ {file = "watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282"},
+]
+
+[package.extras]
+watchmedo = ["PyYAML (>=3.10)"]
+
[[package]]
name = "werkzeug"
version = "3.1.3"
@@ -4769,4 +5373,4 @@ zstandard = ["zstandard"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9, !=3.9.7"
-content-hash = "5d0dd91ca2837bd93fe8a2d17b504f992d0c3095a278de43982e89a65c67ee66"
+content-hash = "306213628bcc69346e14742843c8e6bccf19c2615886943c2e1482a954a388ec"
\ No newline at end of file
diff --git a/pyiceberg/__init__.py b/pyiceberg/__init__.py
index 42c6e12f1b..e97de9276f 100644
--- a/pyiceberg/__init__.py
+++ b/pyiceberg/__init__.py
@@ -15,4 +15,4 @@
# specific language governing permissions and limitations
# under the License.
-__version__ = "0.8.0"
+__version__ = "0.9.0"
diff --git a/pyiceberg/avro/reader.py b/pyiceberg/avro/reader.py
index 988bd42ba4..a5578680d6 100644
--- a/pyiceberg/avro/reader.py
+++ b/pyiceberg/avro/reader.py
@@ -51,7 +51,7 @@
def _skip_map_array(decoder: BinaryDecoder, skip_entry: Callable[[], None]) -> None:
"""Skips over an array or map.
- Both the array and map are encoded similar, and we can re-use
+ Both the array and map are encoded similar, and we can reuse
the logic of skipping in an efficient way.
From the Avro spec:
diff --git a/pyiceberg/cli/output.py b/pyiceberg/cli/output.py
index a4183c32bd..0eb85841bf 100644
--- a/pyiceberg/cli/output.py
+++ b/pyiceberg/cli/output.py
@@ -242,8 +242,10 @@ def version(self, version: str) -> None:
self._out({"version": version})
def describe_refs(self, refs: List[Tuple[str, SnapshotRefType, Dict[str, str]]]) -> None:
- self._out([
- {"name": name, "type": type, detail_key: detail_val}
- for name, type, detail in refs
- for detail_key, detail_val in detail.items()
- ])
+ self._out(
+ [
+ {"name": name, "type": type, detail_key: detail_val}
+ for name, type, detail in refs
+ for detail_key, detail_val in detail.items()
+ ]
+ )
diff --git a/pyiceberg/expressions/visitors.py b/pyiceberg/expressions/visitors.py
index 26698921b5..768878b068 100644
--- a/pyiceberg/expressions/visitors.py
+++ b/pyiceberg/expressions/visitors.py
@@ -1228,7 +1228,7 @@ def visit_less_than(self, term: BoundTerm[L], literal: Literal[L]) -> bool:
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
- if lower_bound >= literal.value:
+ if lower_bound >= literal.value: # type: ignore[operator]
return ROWS_CANNOT_MATCH
return ROWS_MIGHT_MATCH
@@ -1249,7 +1249,7 @@ def visit_less_than_or_equal(self, term: BoundTerm[L], literal: Literal[L]) -> b
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
- if lower_bound > literal.value:
+ if lower_bound > literal.value: # type: ignore[operator]
return ROWS_CANNOT_MATCH
return ROWS_MIGHT_MATCH
@@ -1266,7 +1266,7 @@ def visit_greater_than(self, term: BoundTerm[L], literal: Literal[L]) -> bool:
if upper_bound_bytes := self.upper_bounds.get(field_id):
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
- if upper_bound <= literal.value:
+ if upper_bound <= literal.value: # type: ignore[operator]
if self._is_nan(upper_bound):
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
@@ -1287,7 +1287,7 @@ def visit_greater_than_or_equal(self, term: BoundTerm[L], literal: Literal[L]) -
if upper_bound_bytes := self.upper_bounds.get(field_id):
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
- if upper_bound < literal.value:
+ if upper_bound < literal.value: # type: ignore[operator]
if self._is_nan(upper_bound):
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
@@ -1312,7 +1312,7 @@ def visit_equal(self, term: BoundTerm[L], literal: Literal[L]) -> bool:
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
- if lower_bound > literal.value:
+ if lower_bound > literal.value: # type: ignore[operator]
return ROWS_CANNOT_MATCH
if upper_bound_bytes := self.upper_bounds.get(field_id):
@@ -1321,7 +1321,7 @@ def visit_equal(self, term: BoundTerm[L], literal: Literal[L]) -> bool:
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
- if upper_bound < literal.value:
+ if upper_bound < literal.value: # type: ignore[operator]
return ROWS_CANNOT_MATCH
return ROWS_MIGHT_MATCH
@@ -1349,7 +1349,7 @@ def visit_in(self, term: BoundTerm[L], literals: Set[L]) -> bool:
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
- literals = {lit for lit in literals if lower_bound <= lit}
+ literals = {lit for lit in literals if lower_bound <= lit} # type: ignore[operator]
if len(literals) == 0:
return ROWS_CANNOT_MATCH
@@ -1359,7 +1359,7 @@ def visit_in(self, term: BoundTerm[L], literals: Set[L]) -> bool:
if self._is_nan(upper_bound):
return ROWS_MIGHT_MATCH
- literals = {lit for lit in literals if upper_bound >= lit}
+ literals = {lit for lit in literals if upper_bound >= lit} # type: ignore[operator]
if len(literals) == 0:
return ROWS_CANNOT_MATCH
diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
index 40186069d4..f322221e4b 100644
--- a/pyiceberg/io/__init__.py
+++ b/pyiceberg/io/__init__.py
@@ -48,14 +48,6 @@
logger = logging.getLogger(__name__)
-ADLFS_CONNECTION_STRING = "adlfs.connection-string"
-ADLFS_ACCOUNT_NAME = "adlfs.account-name"
-ADLFS_ACCOUNT_KEY = "adlfs.account-key"
-ADLFS_SAS_TOKEN = "adlfs.sas-token"
-ADLFS_TENANT_ID = "adlfs.tenant-id"
-ADLFS_CLIENT_ID = "adlfs.client-id"
-ADLFS_ClIENT_SECRET = "adlfs.client-secret"
-ADLFS_PREFIX = "adlfs"
AWS_REGION = "client.region"
AWS_ACCESS_KEY_ID = "client.access-key-id"
AWS_SECRET_ACCESS_KEY = "client.secret-access-key"
@@ -94,7 +86,6 @@
GCS_CACHE_TIMEOUT = "gcs.cache-timeout"
GCS_REQUESTER_PAYS = "gcs.requester-pays"
GCS_SESSION_KWARGS = "gcs.session-kwargs"
-GCS_ENDPOINT = "gcs.endpoint"
GCS_SERVICE_HOST = "gcs.service.host"
GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
GCS_VERSION_AWARE = "gcs.version-aware"
diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py
index 23796d4e6a..62e9b92342 100644
--- a/pyiceberg/io/fsspec.py
+++ b/pyiceberg/io/fsspec.py
@@ -40,13 +40,6 @@
from pyiceberg.catalog import TOKEN
from pyiceberg.exceptions import SignError
from pyiceberg.io import (
- ADLFS_ACCOUNT_KEY,
- ADLFS_ACCOUNT_NAME,
- ADLFS_CLIENT_ID,
- ADLFS_CONNECTION_STRING,
- ADLFS_PREFIX,
- ADLFS_SAS_TOKEN,
- ADLFS_TENANT_ID,
ADLS_ACCOUNT_KEY,
ADLS_ACCOUNT_NAME,
ADLS_CLIENT_ID,
@@ -61,7 +54,6 @@
GCS_CACHE_TIMEOUT,
GCS_CONSISTENCY,
GCS_DEFAULT_LOCATION,
- GCS_ENDPOINT,
GCS_PROJECT_ID,
GCS_REQUESTER_PAYS,
GCS_SERVICE_HOST,
@@ -78,7 +70,6 @@
S3_SIGNER_ENDPOINT,
S3_SIGNER_ENDPOINT_DEFAULT,
S3_SIGNER_URI,
- ADLFS_ClIENT_SECRET,
ADLS_ClIENT_SECRET,
FileIO,
InputFile,
@@ -87,7 +78,6 @@
OutputStream,
)
from pyiceberg.typedef import Properties
-from pyiceberg.utils.deprecated import deprecation_message
from pyiceberg.utils.properties import get_first_property_value, property_as_bool
logger = logging.getLogger(__name__)
@@ -172,12 +162,6 @@ def _gs(properties: Properties) -> AbstractFileSystem:
# https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
from gcsfs import GCSFileSystem
- if properties.get(GCS_ENDPOINT):
- deprecation_message(
- deprecated_in="0.8.0",
- removed_in="0.9.0",
- help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
- )
return GCSFileSystem(
project=properties.get(GCS_PROJECT_ID),
access=properties.get(GCS_ACCESS, "full_control"),
@@ -186,7 +170,7 @@ def _gs(properties: Properties) -> AbstractFileSystem:
cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
- endpoint_url=get_first_property_value(properties, GCS_SERVICE_HOST, GCS_ENDPOINT),
+ endpoint_url=properties.get(GCS_SERVICE_HOST),
default_location=properties.get(GCS_DEFAULT_LOCATION),
version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
)
@@ -195,50 +179,14 @@ def _gs(properties: Properties) -> AbstractFileSystem:
def _adls(properties: Properties) -> AbstractFileSystem:
from adlfs import AzureBlobFileSystem
- for property_name in properties:
- if property_name.startswith(ADLFS_PREFIX):
- deprecation_message(
- deprecated_in="0.8.0",
- removed_in="0.9.0",
- help_message=f"The property {property_name} is deprecated. Please use properties that start with adls.",
- )
-
return AzureBlobFileSystem(
- connection_string=get_first_property_value(
- properties,
- ADLS_CONNECTION_STRING,
- ADLFS_CONNECTION_STRING,
- ),
- account_name=get_first_property_value(
- properties,
- ADLS_ACCOUNT_NAME,
- ADLFS_ACCOUNT_NAME,
- ),
- account_key=get_first_property_value(
- properties,
- ADLS_ACCOUNT_KEY,
- ADLFS_ACCOUNT_KEY,
- ),
- sas_token=get_first_property_value(
- properties,
- ADLS_SAS_TOKEN,
- ADLFS_SAS_TOKEN,
- ),
- tenant_id=get_first_property_value(
- properties,
- ADLS_TENANT_ID,
- ADLFS_TENANT_ID,
- ),
- client_id=get_first_property_value(
- properties,
- ADLS_CLIENT_ID,
- ADLFS_CLIENT_ID,
- ),
- client_secret=get_first_property_value(
- properties,
- ADLS_ClIENT_SECRET,
- ADLFS_ClIENT_SECRET,
- ),
+ connection_string=properties.get(ADLS_CONNECTION_STRING),
+ account_name=properties.get(ADLS_ACCOUNT_NAME),
+ account_key=properties.get(ADLS_ACCOUNT_KEY),
+ sas_token=properties.get(ADLS_SAS_TOKEN),
+ tenant_id=properties.get(ADLS_TENANT_ID),
+ client_id=properties.get(ADLS_CLIENT_ID),
+ client_secret=properties.get(ADLS_ClIENT_SECRET),
)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index ef6937f1bb..d288e4f2f1 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -90,7 +90,6 @@
AWS_SECRET_ACCESS_KEY,
AWS_SESSION_TOKEN,
GCS_DEFAULT_LOCATION,
- GCS_ENDPOINT,
GCS_SERVICE_HOST,
GCS_TOKEN,
GCS_TOKEN_EXPIRES_AT_MS,
@@ -136,6 +135,7 @@
visit,
visit_with_partner,
)
+from pyiceberg.table.locations import load_location_provider
from pyiceberg.table.metadata import TableMetadata
from pyiceberg.table.name_mapping import NameMapping, apply_name_mapping
from pyiceberg.transforms import TruncateTransform
@@ -165,7 +165,6 @@
from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.config import Config
from pyiceberg.utils.datetime import millis_to_datetime
-from pyiceberg.utils.deprecated import deprecation_message
from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
from pyiceberg.utils.singleton import Singleton
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
@@ -351,77 +350,135 @@ def parse_location(location: str) -> Tuple[str, str, str]:
return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"
def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSystem:
- if scheme in {"s3", "s3a", "s3n", "oss"}:
- from pyarrow.fs import S3FileSystem
-
- client_kwargs: Dict[str, Any] = {
- "endpoint_override": self.properties.get(S3_ENDPOINT),
- "access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
- "secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
- "session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
- "region": get_first_property_value(self.properties, S3_REGION, AWS_REGION),
- }
-
- if proxy_uri := self.properties.get(S3_PROXY_URI):
- client_kwargs["proxy_options"] = proxy_uri
-
- if connect_timeout := self.properties.get(S3_CONNECT_TIMEOUT):
- client_kwargs["connect_timeout"] = float(connect_timeout)
-
- if role_arn := get_first_property_value(self.properties, S3_ROLE_ARN, AWS_ROLE_ARN):
- client_kwargs["role_arn"] = role_arn
-
- if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME):
- client_kwargs["session_name"] = session_name
-
- if force_virtual_addressing := self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING):
- client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, force_virtual_addressing, False)
-
- return S3FileSystem(**client_kwargs)
- elif scheme in ("hdfs", "viewfs"):
- from pyarrow.fs import HadoopFileSystem
-
- hdfs_kwargs: Dict[str, Any] = {}
- if netloc:
- return HadoopFileSystem.from_uri(f"{scheme}://{netloc}")
- if host := self.properties.get(HDFS_HOST):
- hdfs_kwargs["host"] = host
- if port := self.properties.get(HDFS_PORT):
- # port should be an integer type
- hdfs_kwargs["port"] = int(port)
- if user := self.properties.get(HDFS_USER):
- hdfs_kwargs["user"] = user
- if kerb_ticket := self.properties.get(HDFS_KERB_TICKET):
- hdfs_kwargs["kerb_ticket"] = kerb_ticket
-
- return HadoopFileSystem(**hdfs_kwargs)
+ """Initialize FileSystem for different scheme."""
+ if scheme in {"oss"}:
+ return self._initialize_oss_fs()
+
+ elif scheme in {"s3", "s3a", "s3n"}:
+ return self._initialize_s3_fs(netloc)
+
+ elif scheme in {"hdfs", "viewfs"}:
+ return self._initialize_hdfs_fs(scheme, netloc)
+
elif scheme in {"gs", "gcs"}:
- from pyarrow.fs import GcsFileSystem
-
- gcs_kwargs: Dict[str, Any] = {}
- if access_token := self.properties.get(GCS_TOKEN):
- gcs_kwargs["access_token"] = access_token
- if expiration := self.properties.get(GCS_TOKEN_EXPIRES_AT_MS):
- gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration))
- if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
- gcs_kwargs["default_bucket_location"] = bucket_location
- if endpoint := get_first_property_value(self.properties, GCS_SERVICE_HOST, GCS_ENDPOINT):
- if self.properties.get(GCS_ENDPOINT):
- deprecation_message(
- deprecated_in="0.8.0",
- removed_in="0.9.0",
- help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
- )
- url_parts = urlparse(endpoint)
- gcs_kwargs["scheme"] = url_parts.scheme
- gcs_kwargs["endpoint_override"] = url_parts.netloc
+ return self._initialize_gcs_fs()
+
+ elif scheme in {"file"}:
+ return self._initialize_local_fs()
- return GcsFileSystem(**gcs_kwargs)
- elif scheme == "file":
- return PyArrowLocalFileSystem()
else:
raise ValueError(f"Unrecognized filesystem type in URI: {scheme}")
+ def _initialize_oss_fs(self) -> FileSystem:
+ from pyarrow.fs import S3FileSystem
+
+ client_kwargs: Dict[str, Any] = {
+ "endpoint_override": self.properties.get(S3_ENDPOINT),
+ "access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
+ "secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
+ "session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
+ "region": get_first_property_value(self.properties, S3_REGION, AWS_REGION),
+ }
+
+ if proxy_uri := self.properties.get(S3_PROXY_URI):
+ client_kwargs["proxy_options"] = proxy_uri
+
+ if connect_timeout := self.properties.get(S3_CONNECT_TIMEOUT):
+ client_kwargs["connect_timeout"] = float(connect_timeout)
+
+ if role_arn := get_first_property_value(self.properties, S3_ROLE_ARN, AWS_ROLE_ARN):
+ client_kwargs["role_arn"] = role_arn
+
+ if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME):
+ client_kwargs["session_name"] = session_name
+
+ if force_virtual_addressing := self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING):
+ client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, force_virtual_addressing, False)
+
+ return S3FileSystem(**client_kwargs)
+
+ def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem:
+ from pyarrow.fs import S3FileSystem, resolve_s3_region
+
+ # Resolve region from netloc(bucket), fallback to user-provided region
+ provided_region = get_first_property_value(self.properties, S3_REGION, AWS_REGION)
+
+ try:
+ bucket_region = resolve_s3_region(bucket=netloc)
+ except (OSError, TypeError):
+ bucket_region = None
+ logger.warning(f"Unable to resolve region for bucket {netloc}, using default region {provided_region}")
+
+ bucket_region = bucket_region or provided_region
+ if bucket_region != provided_region:
+ logger.warning(
+ f"PyArrow FileIO overriding S3 bucket region for bucket {netloc}: "
+ f"provided region {provided_region}, actual region {bucket_region}"
+ )
+
+ client_kwargs: Dict[str, Any] = {
+ "endpoint_override": self.properties.get(S3_ENDPOINT),
+ "access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
+ "secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
+ "session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
+ "region": bucket_region,
+ }
+
+ if proxy_uri := self.properties.get(S3_PROXY_URI):
+ client_kwargs["proxy_options"] = proxy_uri
+
+ if connect_timeout := self.properties.get(S3_CONNECT_TIMEOUT):
+ client_kwargs["connect_timeout"] = float(connect_timeout)
+
+ if role_arn := get_first_property_value(self.properties, S3_ROLE_ARN, AWS_ROLE_ARN):
+ client_kwargs["role_arn"] = role_arn
+
+ if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME):
+ client_kwargs["session_name"] = session_name
+
+ if force_virtual_addressing := self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING):
+ client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, force_virtual_addressing, False)
+
+ return S3FileSystem(**client_kwargs)
+
+ def _initialize_hdfs_fs(self, scheme: str, netloc: Optional[str]) -> FileSystem:
+ from pyarrow.fs import HadoopFileSystem
+
+ hdfs_kwargs: Dict[str, Any] = {}
+ if netloc:
+ return HadoopFileSystem.from_uri(f"{scheme}://{netloc}")
+ if host := self.properties.get(HDFS_HOST):
+ hdfs_kwargs["host"] = host
+ if port := self.properties.get(HDFS_PORT):
+ # port should be an integer type
+ hdfs_kwargs["port"] = int(port)
+ if user := self.properties.get(HDFS_USER):
+ hdfs_kwargs["user"] = user
+ if kerb_ticket := self.properties.get(HDFS_KERB_TICKET):
+ hdfs_kwargs["kerb_ticket"] = kerb_ticket
+
+ return HadoopFileSystem(**hdfs_kwargs)
+
+ def _initialize_gcs_fs(self) -> FileSystem:
+ from pyarrow.fs import GcsFileSystem
+
+ gcs_kwargs: Dict[str, Any] = {}
+ if access_token := self.properties.get(GCS_TOKEN):
+ gcs_kwargs["access_token"] = access_token
+ if expiration := self.properties.get(GCS_TOKEN_EXPIRES_AT_MS):
+ gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration))
+ if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
+ gcs_kwargs["default_bucket_location"] = bucket_location
+ if endpoint := self.properties.get(GCS_SERVICE_HOST):
+ url_parts = urlparse(endpoint)
+ gcs_kwargs["scheme"] = url_parts.scheme
+ gcs_kwargs["endpoint_override"] = url_parts.netloc
+
+ return GcsFileSystem(**gcs_kwargs)
+
+ def _initialize_local_fs(self) -> FileSystem:
+ return PyArrowLocalFileSystem()
+
def new_input(self, location: str) -> PyArrowFile:
"""Get a PyArrowFile instance to read bytes from the file at the given location.
@@ -1326,13 +1383,14 @@ def _task_to_table(
return None
-def _read_all_delete_files(fs: FileSystem, tasks: Iterable[FileScanTask]) -> Dict[str, List[ChunkedArray]]:
+def _read_all_delete_files(io: FileIO, tasks: Iterable[FileScanTask]) -> Dict[str, List[ChunkedArray]]:
deletes_per_file: Dict[str, List[ChunkedArray]] = {}
unique_deletes = set(itertools.chain.from_iterable([task.delete_files for task in tasks]))
if len(unique_deletes) > 0:
executor = ExecutorFactory.get_or_create()
deletes_per_files: Iterator[Dict[str, ChunkedArray]] = executor.map(
- lambda args: _read_deletes(*args), [(fs, delete) for delete in unique_deletes]
+ lambda args: _read_deletes(*args),
+ [(_fs_from_file_path(io, delete_file.file_path), delete_file) for delete_file in unique_deletes],
)
for delete in deletes_per_files:
for file, arr in delete.items():
@@ -1344,7 +1402,7 @@ def _read_all_delete_files(fs: FileSystem, tasks: Iterable[FileScanTask]) -> Dic
return deletes_per_file
-def _fs_from_file_path(file_path: str, io: FileIO) -> FileSystem:
+def _fs_from_file_path(io: FileIO, file_path: str) -> FileSystem:
scheme, netloc, _ = _parse_location(file_path)
if isinstance(io, PyArrowFileIO):
return io.fs_by_scheme(scheme, netloc)
@@ -1366,7 +1424,6 @@ def _fs_from_file_path(file_path: str, io: FileIO) -> FileSystem:
class ArrowScan:
_table_metadata: TableMetadata
_io: FileIO
- _fs: FileSystem
_projected_schema: Schema
_bound_row_filter: BooleanExpression
_case_sensitive: bool
@@ -1376,7 +1433,6 @@ class ArrowScan:
Attributes:
_table_metadata: Current table metadata of the Iceberg table
_io: PyIceberg FileIO implementation from which to fetch the io properties
- _fs: PyArrow FileSystem to use to read the files
_projected_schema: Iceberg Schema to project onto the data files
_bound_row_filter: Schema bound row expression to filter the data with
_case_sensitive: Case sensitivity when looking up column names
@@ -1394,7 +1450,6 @@ def __init__(
) -> None:
self._table_metadata = table_metadata
self._io = io
- self._fs = _fs_from_file_path(table_metadata.location, io) # TODO: use different FileSystem per file
self._projected_schema = projected_schema
self._bound_row_filter = bind(table_metadata.schema(), row_filter, case_sensitive=case_sensitive)
self._case_sensitive = case_sensitive
@@ -1434,7 +1489,7 @@ def to_table(self, tasks: Iterable[FileScanTask]) -> pa.Table:
ResolveError: When a required field cannot be found in the file
ValueError: When a field type in the file cannot be projected to the schema type
"""
- deletes_per_file = _read_all_delete_files(self._fs, tasks)
+ deletes_per_file = _read_all_delete_files(self._io, tasks)
executor = ExecutorFactory.get_or_create()
def _table_from_scan_task(task: FileScanTask) -> pa.Table:
@@ -1497,7 +1552,7 @@ def to_record_batches(self, tasks: Iterable[FileScanTask]) -> Iterator[pa.Record
ResolveError: When a required field cannot be found in the file
ValueError: When a field type in the file cannot be projected to the schema type
"""
- deletes_per_file = _read_all_delete_files(self._fs, tasks)
+ deletes_per_file = _read_all_delete_files(self._io, tasks)
return self._record_batches_from_scan_tasks_and_deletes(tasks, deletes_per_file)
def _record_batches_from_scan_tasks_and_deletes(
@@ -1508,7 +1563,7 @@ def _record_batches_from_scan_tasks_and_deletes(
if self._limit is not None and total_row_count >= self._limit:
break
batches = _task_to_record_batches(
- self._fs,
+ _fs_from_file_path(self._io, task.file.file_path),
task,
self._bound_row_filter,
self._projected_schema,
@@ -1536,7 +1591,7 @@ def _to_requested_schema(
include_field_ids: bool = False,
use_large_types: bool = True,
) -> pa.RecordBatch:
- # We could re-use some of these visitors
+ # We could reuse some of these visitors
struct_array = visit_with_partner(
requested_schema,
batch,
@@ -2243,6 +2298,7 @@ def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteT
property_name=TableProperties.PARQUET_ROW_GROUP_LIMIT,
default=TableProperties.PARQUET_ROW_GROUP_LIMIT_DEFAULT,
)
+ location_provider = load_location_provider(table_location=table_metadata.location, table_properties=table_metadata.properties)
def write_parquet(task: WriteTask) -> DataFile:
table_schema = table_metadata.schema()
@@ -2265,7 +2321,10 @@ def write_parquet(task: WriteTask) -> DataFile:
for batch in task.record_batches
]
arrow_table = pa.Table.from_batches(batches)
- file_path = f"{table_metadata.location}/data/{task.generate_data_file_path('parquet')}"
+ file_path = location_provider.new_data_location(
+ data_file_name=task.generate_data_file_filename("parquet"),
+ partition_key=task.partition_key,
+ )
fo = io.new_output(file_path)
with fo.create(overwrite=True) as fos:
with pq.ParquetWriter(fos, schema=arrow_table.schema, **parquet_writer_kwargs) as writer:
@@ -2449,27 +2508,31 @@ def _dataframe_to_data_files(
yield from write_file(
io=io,
table_metadata=table_metadata,
- tasks=iter([
- WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=task_schema)
- for batches in bin_pack_arrow_table(df, target_file_size)
- ]),
+ tasks=iter(
+ [
+ WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=task_schema)
+ for batches in bin_pack_arrow_table(df, target_file_size)
+ ]
+ ),
)
else:
partitions = _determine_partitions(spec=table_metadata.spec(), schema=table_metadata.schema(), arrow_table=df)
yield from write_file(
io=io,
table_metadata=table_metadata,
- tasks=iter([
- WriteTask(
- write_uuid=write_uuid,
- task_id=next(counter),
- record_batches=batches,
- partition_key=partition.partition_key,
- schema=task_schema,
- )
- for partition in partitions
- for batches in bin_pack_arrow_table(partition.arrow_table_partition, target_file_size)
- ]),
+ tasks=iter(
+ [
+ WriteTask(
+ write_uuid=write_uuid,
+ task_id=next(counter),
+ record_batches=batches,
+ partition_key=partition.partition_key,
+ schema=task_schema,
+ )
+ for partition in partitions
+ for batches in bin_pack_arrow_table(partition.arrow_table_partition, target_file_size)
+ ]
+ ),
)
@@ -2534,10 +2597,12 @@ def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.T
partition_columns: List[Tuple[PartitionField, NestedField]] = [
(partition_field, schema.find_field(partition_field.source_id)) for partition_field in spec.fields
]
- partition_values_table = pa.table({
- str(partition.field_id): partition.transform.pyarrow_transform(field.field_type)(arrow_table[field.name])
- for partition, field in partition_columns
- })
+ partition_values_table = pa.table(
+ {
+ str(partition.field_id): partition.transform.pyarrow_transform(field.field_type)(arrow_table[field.name])
+ for partition, field in partition_columns
+ }
+ )
# Sort by partitions
sort_indices = pa.compute.sort_indices(
diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index a56da5fc05..5a32a6330c 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -292,28 +292,32 @@ def __repr__(self) -> str:
def data_file_with_partition(partition_type: StructType, format_version: TableVersion) -> StructType:
- data_file_partition_type = StructType(*[
- NestedField(
- field_id=field.field_id,
- name=field.name,
- field_type=field.field_type,
- required=field.required,
- )
- for field in partition_type.fields
- ])
+ data_file_partition_type = StructType(
+ *[
+ NestedField(
+ field_id=field.field_id,
+ name=field.name,
+ field_type=field.field_type,
+ required=field.required,
+ )
+ for field in partition_type.fields
+ ]
+ )
- return StructType(*[
- NestedField(
- field_id=102,
- name="partition",
- field_type=data_file_partition_type,
- required=True,
- doc="Partition data tuple, schema based on the partition spec",
- )
- if field.field_id == 102
- else field
- for field in DATA_FILE_TYPE[format_version].fields
- ])
+ return StructType(
+ *[
+ NestedField(
+ field_id=102,
+ name="partition",
+ field_type=data_file_partition_type,
+ required=True,
+ doc="Partition data tuple, schema based on the partition spec",
+ )
+ if field.field_id == 102
+ else field
+ for field in DATA_FILE_TYPE[format_version].fields
+ ]
+ )
class DataFile(Record):
@@ -398,10 +402,12 @@ def __eq__(self, other: Any) -> bool:
def manifest_entry_schema_with_data_file(format_version: TableVersion, data_file: StructType) -> Schema:
- return Schema(*[
- NestedField(2, "data_file", data_file, required=True) if field.field_id == 2 else field
- for field in MANIFEST_ENTRY_SCHEMAS[format_version].fields
- ])
+ return Schema(
+ *[
+ NestedField(2, "data_file", data_file, required=True) if field.field_id == 2 else field
+ for field in MANIFEST_ENTRY_SCHEMAS[format_version].fields
+ ]
+ )
class ManifestEntry(Record):
diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py
index 5f9178ebf9..1813772217 100644
--- a/pyiceberg/partitioning.py
+++ b/pyiceberg/partitioning.py
@@ -30,7 +30,7 @@
Tuple,
TypeVar,
)
-from urllib.parse import quote
+from urllib.parse import quote_plus
from pydantic import (
BeforeValidator,
@@ -234,9 +234,8 @@ def partition_to_path(self, data: Record, schema: Schema) -> str:
partition_field = self.fields[pos]
value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=data[pos])
- value_str = quote(value_str, safe="")
- value_strs.append(value_str)
- field_strs.append(partition_field.name)
+ value_strs.append(quote_plus(value_str, safe=""))
+ field_strs.append(quote_plus(partition_field.name, safe=""))
path = "/".join([field_str + "=" + value_str for field_str, value_str in zip(field_strs, value_strs)])
return path
diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py
index cfe3fe3a7b..5a373cb15f 100644
--- a/pyiceberg/schema.py
+++ b/pyiceberg/schema.py
@@ -1707,12 +1707,14 @@ def list(self, list_type: ListType, element_result: Callable[[], bool]) -> bool:
return self._is_field_compatible(list_type.element_field) and element_result()
def map(self, map_type: MapType, key_result: Callable[[], bool], value_result: Callable[[], bool]) -> bool:
- return all([
- self._is_field_compatible(map_type.key_field),
- self._is_field_compatible(map_type.value_field),
- key_result(),
- value_result(),
- ])
+ return all(
+ [
+ self._is_field_compatible(map_type.key_field),
+ self._is_field_compatible(map_type.value_field),
+ key_result(),
+ value_result(),
+ ]
+ )
def primitive(self, primitive: PrimitiveType) -> bool:
return True
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 4ec3403bb3..f2df84d7ee 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -187,6 +187,14 @@ class TableProperties:
WRITE_PARTITION_SUMMARY_LIMIT = "write.summary.partition-limit"
WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT = 0
+ WRITE_PY_LOCATION_PROVIDER_IMPL = "write.py-location-provider.impl"
+
+ OBJECT_STORE_ENABLED = "write.object-storage.enabled"
+ OBJECT_STORE_ENABLED_DEFAULT = True
+
+ WRITE_OBJECT_STORE_PARTITIONED_PATHS = "write.object-storage.partitioned-paths"
+ WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT = True
+
DELETE_MODE = "write.delete.mode"
DELETE_MODE_COPY_ON_WRITE = "copy-on-write"
DELETE_MODE_MERGE_ON_READ = "merge-on-read"
@@ -629,18 +637,20 @@ def delete(
if len(filtered_df) == 0:
replaced_files.append((original_file.file, []))
elif len(df) != len(filtered_df):
- replaced_files.append((
- original_file.file,
- list(
- _dataframe_to_data_files(
- io=self._table.io,
- df=filtered_df,
- table_metadata=self.table_metadata,
- write_uuid=commit_uuid,
- counter=counter,
- )
- ),
- ))
+ replaced_files.append(
+ (
+ original_file.file,
+ list(
+ _dataframe_to_data_files(
+ io=self._table.io,
+ df=filtered_df,
+ table_metadata=self.table_metadata,
+ write_uuid=commit_uuid,
+ counter=counter,
+ )
+ ),
+ )
+ )
if len(replaced_files) > 0:
with self.update_snapshot(snapshot_properties=snapshot_properties).overwrite() as overwrite_snapshot:
@@ -680,9 +690,9 @@ def add_files(
raise ValueError(f"Cannot add files that are already referenced by table, files: {', '.join(referenced_files)}")
if self.table_metadata.name_mapping() is None:
- self.set_properties(**{
- TableProperties.DEFAULT_NAME_MAPPING: self.table_metadata.schema().name_mapping.model_dump_json()
- })
+ self.set_properties(
+ **{TableProperties.DEFAULT_NAME_MAPPING: self.table_metadata.schema().name_mapping.model_dump_json()}
+ )
with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
data_files = _parquet_files_to_data_files(
table_metadata=self.table_metadata, file_paths=file_paths, io=self._table.io
@@ -902,7 +912,7 @@ def scan(
Args:
row_filter:
- A string or BooleanExpression that decsribes the
+ A string or BooleanExpression that describes the
desired rows
selected_fields:
A tuple of strings representing the column names
@@ -1611,13 +1621,6 @@ def generate_data_file_filename(self, extension: str) -> str:
# https://github.com/apache/iceberg/blob/a582968975dd30ff4917fbbe999f1be903efac02/core/src/main/java/org/apache/iceberg/io/OutputFileFactory.java#L92-L101
return f"00000-{self.task_id}-{self.write_uuid}.{extension}"
- def generate_data_file_path(self, extension: str) -> str:
- if self.partition_key:
- file_path = f"{self.partition_key.to_path()}/{self.generate_data_file_filename(extension)}"
- return file_path
- else:
- return self.generate_data_file_filename(extension)
-
@dataclass(frozen=True)
class AddFileTask:
diff --git a/pyiceberg/table/inspect.py b/pyiceberg/table/inspect.py
index beee426533..6dfa78a7ac 100644
--- a/pyiceberg/table/inspect.py
+++ b/pyiceberg/table/inspect.py
@@ -17,13 +17,14 @@
from __future__ import annotations
from datetime import datetime, timezone
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Set, Tuple
from pyiceberg.conversions import from_bytes
from pyiceberg.manifest import DataFile, DataFileContent, ManifestContent, PartitionFieldSummary
from pyiceberg.partitioning import PartitionSpec
from pyiceberg.table.snapshots import Snapshot, ancestors_of
from pyiceberg.types import PrimitiveType
+from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.singleton import _convert_to_hashable_type
if TYPE_CHECKING:
@@ -58,14 +59,16 @@ def _get_snapshot(self, snapshot_id: Optional[int] = None) -> Snapshot:
def snapshots(self) -> "pa.Table":
import pyarrow as pa
- snapshots_schema = pa.schema([
- pa.field("committed_at", pa.timestamp(unit="ms"), nullable=False),
- pa.field("snapshot_id", pa.int64(), nullable=False),
- pa.field("parent_id", pa.int64(), nullable=True),
- pa.field("operation", pa.string(), nullable=True),
- pa.field("manifest_list", pa.string(), nullable=False),
- pa.field("summary", pa.map_(pa.string(), pa.string()), nullable=True),
- ])
+ snapshots_schema = pa.schema(
+ [
+ pa.field("committed_at", pa.timestamp(unit="ms"), nullable=False),
+ pa.field("snapshot_id", pa.int64(), nullable=False),
+ pa.field("parent_id", pa.int64(), nullable=True),
+ pa.field("operation", pa.string(), nullable=True),
+ pa.field("manifest_list", pa.string(), nullable=False),
+ pa.field("summary", pa.map_(pa.string(), pa.string()), nullable=True),
+ ]
+ )
snapshots = []
for snapshot in self.tbl.metadata.snapshots:
if summary := snapshot.summary:
@@ -75,14 +78,16 @@ def snapshots(self) -> "pa.Table":
operation = None
additional_properties = None
- snapshots.append({
- "committed_at": datetime.fromtimestamp(snapshot.timestamp_ms / 1000.0, tz=timezone.utc),
- "snapshot_id": snapshot.snapshot_id,
- "parent_id": snapshot.parent_snapshot_id,
- "operation": str(operation),
- "manifest_list": snapshot.manifest_list,
- "summary": additional_properties,
- })
+ snapshots.append(
+ {
+ "committed_at": datetime.fromtimestamp(snapshot.timestamp_ms / 1000.0, tz=timezone.utc),
+ "snapshot_id": snapshot.snapshot_id,
+ "parent_id": snapshot.parent_snapshot_id,
+ "operation": str(operation),
+ "manifest_list": snapshot.manifest_list,
+ "summary": additional_properties,
+ }
+ )
return pa.Table.from_pylist(
snapshots,
@@ -100,14 +105,16 @@ def entries(self, snapshot_id: Optional[int] = None) -> "pa.Table":
def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
pa_bound_type = schema_to_pyarrow(bound_type)
- return pa.struct([
- pa.field("column_size", pa.int64(), nullable=True),
- pa.field("value_count", pa.int64(), nullable=True),
- pa.field("null_value_count", pa.int64(), nullable=True),
- pa.field("nan_value_count", pa.int64(), nullable=True),
- pa.field("lower_bound", pa_bound_type, nullable=True),
- pa.field("upper_bound", pa_bound_type, nullable=True),
- ])
+ return pa.struct(
+ [
+ pa.field("column_size", pa.int64(), nullable=True),
+ pa.field("value_count", pa.int64(), nullable=True),
+ pa.field("null_value_count", pa.int64(), nullable=True),
+ pa.field("nan_value_count", pa.int64(), nullable=True),
+ pa.field("lower_bound", pa_bound_type, nullable=True),
+ pa.field("upper_bound", pa_bound_type, nullable=True),
+ ]
+ )
for field in self.tbl.metadata.schema().fields:
readable_metrics_struct.append(
@@ -117,35 +124,39 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
partition_record = self.tbl.metadata.specs_struct()
pa_record_struct = schema_to_pyarrow(partition_record)
- entries_schema = pa.schema([
- pa.field("status", pa.int8(), nullable=False),
- pa.field("snapshot_id", pa.int64(), nullable=False),
- pa.field("sequence_number", pa.int64(), nullable=False),
- pa.field("file_sequence_number", pa.int64(), nullable=False),
- pa.field(
- "data_file",
- pa.struct([
- pa.field("content", pa.int8(), nullable=False),
- pa.field("file_path", pa.string(), nullable=False),
- pa.field("file_format", pa.string(), nullable=False),
- pa.field("partition", pa_record_struct, nullable=False),
- pa.field("record_count", pa.int64(), nullable=False),
- pa.field("file_size_in_bytes", pa.int64(), nullable=False),
- pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True),
- pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
- pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
- pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
- pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
- pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
- pa.field("key_metadata", pa.binary(), nullable=True),
- pa.field("split_offsets", pa.list_(pa.int64()), nullable=True),
- pa.field("equality_ids", pa.list_(pa.int32()), nullable=True),
- pa.field("sort_order_id", pa.int32(), nullable=True),
- ]),
- nullable=False,
- ),
- pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True),
- ])
+ entries_schema = pa.schema(
+ [
+ pa.field("status", pa.int8(), nullable=False),
+ pa.field("snapshot_id", pa.int64(), nullable=False),
+ pa.field("sequence_number", pa.int64(), nullable=False),
+ pa.field("file_sequence_number", pa.int64(), nullable=False),
+ pa.field(
+ "data_file",
+ pa.struct(
+ [
+ pa.field("content", pa.int8(), nullable=False),
+ pa.field("file_path", pa.string(), nullable=False),
+ pa.field("file_format", pa.string(), nullable=False),
+ pa.field("partition", pa_record_struct, nullable=False),
+ pa.field("record_count", pa.int64(), nullable=False),
+ pa.field("file_size_in_bytes", pa.int64(), nullable=False),
+ pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True),
+ pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+ pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+ pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+ pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
+ pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
+ pa.field("key_metadata", pa.binary(), nullable=True),
+ pa.field("split_offsets", pa.list_(pa.int64()), nullable=True),
+ pa.field("equality_ids", pa.list_(pa.int32()), nullable=True),
+ pa.field("sort_order_id", pa.int32(), nullable=True),
+ ]
+ ),
+ nullable=False,
+ ),
+ pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True),
+ ]
+ )
entries = []
snapshot = self._get_snapshot(snapshot_id)
@@ -180,32 +191,34 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
for pos, field in enumerate(self.tbl.metadata.specs()[manifest.partition_spec_id].fields)
}
- entries.append({
- "status": entry.status.value,
- "snapshot_id": entry.snapshot_id,
- "sequence_number": entry.sequence_number,
- "file_sequence_number": entry.file_sequence_number,
- "data_file": {
- "content": entry.data_file.content,
- "file_path": entry.data_file.file_path,
- "file_format": entry.data_file.file_format,
- "partition": partition_record_dict,
- "record_count": entry.data_file.record_count,
- "file_size_in_bytes": entry.data_file.file_size_in_bytes,
- "column_sizes": dict(entry.data_file.column_sizes),
- "value_counts": dict(entry.data_file.value_counts),
- "null_value_counts": dict(entry.data_file.null_value_counts),
- "nan_value_counts": entry.data_file.nan_value_counts,
- "lower_bounds": entry.data_file.lower_bounds,
- "upper_bounds": entry.data_file.upper_bounds,
- "key_metadata": entry.data_file.key_metadata,
- "split_offsets": entry.data_file.split_offsets,
- "equality_ids": entry.data_file.equality_ids,
- "sort_order_id": entry.data_file.sort_order_id,
- "spec_id": entry.data_file.spec_id,
- },
- "readable_metrics": readable_metrics,
- })
+ entries.append(
+ {
+ "status": entry.status.value,
+ "snapshot_id": entry.snapshot_id,
+ "sequence_number": entry.sequence_number,
+ "file_sequence_number": entry.file_sequence_number,
+ "data_file": {
+ "content": entry.data_file.content,
+ "file_path": entry.data_file.file_path,
+ "file_format": entry.data_file.file_format,
+ "partition": partition_record_dict,
+ "record_count": entry.data_file.record_count,
+ "file_size_in_bytes": entry.data_file.file_size_in_bytes,
+ "column_sizes": dict(entry.data_file.column_sizes),
+ "value_counts": dict(entry.data_file.value_counts),
+ "null_value_counts": dict(entry.data_file.null_value_counts),
+ "nan_value_counts": entry.data_file.nan_value_counts,
+ "lower_bounds": entry.data_file.lower_bounds,
+ "upper_bounds": entry.data_file.upper_bounds,
+ "key_metadata": entry.data_file.key_metadata,
+ "split_offsets": entry.data_file.split_offsets,
+ "equality_ids": entry.data_file.equality_ids,
+ "sort_order_id": entry.data_file.sort_order_id,
+ "spec_id": entry.data_file.spec_id,
+ },
+ "readable_metrics": readable_metrics,
+ }
+ )
return pa.Table.from_pylist(
entries,
@@ -215,26 +228,30 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
def refs(self) -> "pa.Table":
import pyarrow as pa
- ref_schema = pa.schema([
- pa.field("name", pa.string(), nullable=False),
- pa.field("type", pa.dictionary(pa.int32(), pa.string()), nullable=False),
- pa.field("snapshot_id", pa.int64(), nullable=False),
- pa.field("max_reference_age_in_ms", pa.int64(), nullable=True),
- pa.field("min_snapshots_to_keep", pa.int32(), nullable=True),
- pa.field("max_snapshot_age_in_ms", pa.int64(), nullable=True),
- ])
+ ref_schema = pa.schema(
+ [
+ pa.field("name", pa.string(), nullable=False),
+ pa.field("type", pa.dictionary(pa.int32(), pa.string()), nullable=False),
+ pa.field("snapshot_id", pa.int64(), nullable=False),
+ pa.field("max_reference_age_in_ms", pa.int64(), nullable=True),
+ pa.field("min_snapshots_to_keep", pa.int32(), nullable=True),
+ pa.field("max_snapshot_age_in_ms", pa.int64(), nullable=True),
+ ]
+ )
ref_results = []
for ref in self.tbl.metadata.refs:
if snapshot_ref := self.tbl.metadata.refs.get(ref):
- ref_results.append({
- "name": ref,
- "type": snapshot_ref.snapshot_ref_type.upper(),
- "snapshot_id": snapshot_ref.snapshot_id,
- "max_reference_age_in_ms": snapshot_ref.max_ref_age_ms,
- "min_snapshots_to_keep": snapshot_ref.min_snapshots_to_keep,
- "max_snapshot_age_in_ms": snapshot_ref.max_snapshot_age_ms,
- })
+ ref_results.append(
+ {
+ "name": ref,
+ "type": snapshot_ref.snapshot_ref_type.upper(),
+ "snapshot_id": snapshot_ref.snapshot_id,
+ "max_reference_age_in_ms": snapshot_ref.max_ref_age_ms,
+ "min_snapshots_to_keep": snapshot_ref.min_snapshots_to_keep,
+ "max_snapshot_age_in_ms": snapshot_ref.max_snapshot_age_ms,
+ }
+ )
return pa.Table.from_pylist(ref_results, schema=ref_schema)
@@ -243,27 +260,31 @@ def partitions(self, snapshot_id: Optional[int] = None) -> "pa.Table":
from pyiceberg.io.pyarrow import schema_to_pyarrow
- table_schema = pa.schema([
- pa.field("record_count", pa.int64(), nullable=False),
- pa.field("file_count", pa.int32(), nullable=False),
- pa.field("total_data_file_size_in_bytes", pa.int64(), nullable=False),
- pa.field("position_delete_record_count", pa.int64(), nullable=False),
- pa.field("position_delete_file_count", pa.int32(), nullable=False),
- pa.field("equality_delete_record_count", pa.int64(), nullable=False),
- pa.field("equality_delete_file_count", pa.int32(), nullable=False),
- pa.field("last_updated_at", pa.timestamp(unit="ms"), nullable=True),
- pa.field("last_updated_snapshot_id", pa.int64(), nullable=True),
- ])
+ table_schema = pa.schema(
+ [
+ pa.field("record_count", pa.int64(), nullable=False),
+ pa.field("file_count", pa.int32(), nullable=False),
+ pa.field("total_data_file_size_in_bytes", pa.int64(), nullable=False),
+ pa.field("position_delete_record_count", pa.int64(), nullable=False),
+ pa.field("position_delete_file_count", pa.int32(), nullable=False),
+ pa.field("equality_delete_record_count", pa.int64(), nullable=False),
+ pa.field("equality_delete_file_count", pa.int32(), nullable=False),
+ pa.field("last_updated_at", pa.timestamp(unit="ms"), nullable=True),
+ pa.field("last_updated_snapshot_id", pa.int64(), nullable=True),
+ ]
+ )
partition_record = self.tbl.metadata.specs_struct()
has_partitions = len(partition_record.fields) > 0
if has_partitions:
pa_record_struct = schema_to_pyarrow(partition_record)
- partitions_schema = pa.schema([
- pa.field("partition", pa_record_struct, nullable=False),
- pa.field("spec_id", pa.int32(), nullable=False),
- ])
+ partitions_schema = pa.schema(
+ [
+ pa.field("partition", pa_record_struct, nullable=False),
+ pa.field("spec_id", pa.int32(), nullable=False),
+ ]
+ )
table_schema = pa.unify_schemas([partitions_schema, table_schema])
@@ -326,30 +347,45 @@ def update_partitions_map(
schema=table_schema,
)
- def manifests(self) -> "pa.Table":
+ def _get_manifests_schema(self) -> "pa.Schema":
import pyarrow as pa
- partition_summary_schema = pa.struct([
- pa.field("contains_null", pa.bool_(), nullable=False),
- pa.field("contains_nan", pa.bool_(), nullable=True),
- pa.field("lower_bound", pa.string(), nullable=True),
- pa.field("upper_bound", pa.string(), nullable=True),
- ])
-
- manifest_schema = pa.schema([
- pa.field("content", pa.int8(), nullable=False),
- pa.field("path", pa.string(), nullable=False),
- pa.field("length", pa.int64(), nullable=False),
- pa.field("partition_spec_id", pa.int32(), nullable=False),
- pa.field("added_snapshot_id", pa.int64(), nullable=False),
- pa.field("added_data_files_count", pa.int32(), nullable=False),
- pa.field("existing_data_files_count", pa.int32(), nullable=False),
- pa.field("deleted_data_files_count", pa.int32(), nullable=False),
- pa.field("added_delete_files_count", pa.int32(), nullable=False),
- pa.field("existing_delete_files_count", pa.int32(), nullable=False),
- pa.field("deleted_delete_files_count", pa.int32(), nullable=False),
- pa.field("partition_summaries", pa.list_(partition_summary_schema), nullable=False),
- ])
+ partition_summary_schema = pa.struct(
+ [
+ pa.field("contains_null", pa.bool_(), nullable=False),
+ pa.field("contains_nan", pa.bool_(), nullable=True),
+ pa.field("lower_bound", pa.string(), nullable=True),
+ pa.field("upper_bound", pa.string(), nullable=True),
+ ]
+ )
+
+ manifest_schema = pa.schema(
+ [
+ pa.field("content", pa.int8(), nullable=False),
+ pa.field("path", pa.string(), nullable=False),
+ pa.field("length", pa.int64(), nullable=False),
+ pa.field("partition_spec_id", pa.int32(), nullable=False),
+ pa.field("added_snapshot_id", pa.int64(), nullable=False),
+ pa.field("added_data_files_count", pa.int32(), nullable=False),
+ pa.field("existing_data_files_count", pa.int32(), nullable=False),
+ pa.field("deleted_data_files_count", pa.int32(), nullable=False),
+ pa.field("added_delete_files_count", pa.int32(), nullable=False),
+ pa.field("existing_delete_files_count", pa.int32(), nullable=False),
+ pa.field("deleted_delete_files_count", pa.int32(), nullable=False),
+ pa.field("partition_summaries", pa.list_(partition_summary_schema), nullable=False),
+ ]
+ )
+ return manifest_schema
+
+ def _get_all_manifests_schema(self) -> "pa.Schema":
+ import pyarrow as pa
+
+ all_manifests_schema = self._get_manifests_schema()
+ all_manifests_schema = all_manifests_schema.append(pa.field("reference_snapshot_id", pa.int64(), nullable=False))
+ return all_manifests_schema
+
+ def _generate_manifests_table(self, snapshot: Optional[Snapshot], is_all_manifests_table: bool = False) -> "pa.Table":
+ import pyarrow as pa
def _partition_summaries_to_rows(
spec: PartitionSpec, partition_summaries: List[PartitionFieldSummary]
@@ -376,21 +412,23 @@ def _partition_summaries_to_rows(
if field_summary.upper_bound
else None
)
- rows.append({
- "contains_null": field_summary.contains_null,
- "contains_nan": field_summary.contains_nan,
- "lower_bound": lower_bound,
- "upper_bound": upper_bound,
- })
+ rows.append(
+ {
+ "contains_null": field_summary.contains_null,
+ "contains_nan": field_summary.contains_nan,
+ "lower_bound": lower_bound,
+ "upper_bound": upper_bound,
+ }
+ )
return rows
specs = self.tbl.metadata.specs()
manifests = []
- if snapshot := self.tbl.metadata.current_snapshot():
+ if snapshot:
for manifest in snapshot.manifests(self.tbl.io):
is_data_file = manifest.content == ManifestContent.DATA
is_delete_file = manifest.content == ManifestContent.DELETES
- manifests.append({
+ manifest_row = {
"content": manifest.content,
"path": manifest.manifest_path,
"length": manifest.manifest_length,
@@ -405,25 +443,33 @@ def _partition_summaries_to_rows(
"partition_summaries": _partition_summaries_to_rows(specs[manifest.partition_spec_id], manifest.partitions)
if manifest.partitions
else [],
- })
+ }
+ if is_all_manifests_table:
+ manifest_row["reference_snapshot_id"] = snapshot.snapshot_id
+ manifests.append(manifest_row)
return pa.Table.from_pylist(
manifests,
- schema=manifest_schema,
+ schema=self._get_all_manifests_schema() if is_all_manifests_table else self._get_manifests_schema(),
)
+ def manifests(self) -> "pa.Table":
+ return self._generate_manifests_table(self.tbl.current_snapshot())
+
def metadata_log_entries(self) -> "pa.Table":
import pyarrow as pa
from pyiceberg.table.snapshots import MetadataLogEntry
- table_schema = pa.schema([
- pa.field("timestamp", pa.timestamp(unit="ms"), nullable=False),
- pa.field("file", pa.string(), nullable=False),
- pa.field("latest_snapshot_id", pa.int64(), nullable=True),
- pa.field("latest_schema_id", pa.int32(), nullable=True),
- pa.field("latest_sequence_number", pa.int64(), nullable=True),
- ])
+ table_schema = pa.schema(
+ [
+ pa.field("timestamp", pa.timestamp(unit="ms"), nullable=False),
+ pa.field("file", pa.string(), nullable=False),
+ pa.field("latest_snapshot_id", pa.int64(), nullable=True),
+ pa.field("latest_schema_id", pa.int32(), nullable=True),
+ pa.field("latest_sequence_number", pa.int64(), nullable=True),
+ ]
+ )
def metadata_log_entry_to_row(metadata_entry: MetadataLogEntry) -> Dict[str, Any]:
latest_snapshot = self.tbl.snapshot_as_of_timestamp(metadata_entry.timestamp_ms)
@@ -449,12 +495,14 @@ def metadata_log_entry_to_row(metadata_entry: MetadataLogEntry) -> Dict[str, Any
def history(self) -> "pa.Table":
import pyarrow as pa
- history_schema = pa.schema([
- pa.field("made_current_at", pa.timestamp(unit="ms"), nullable=False),
- pa.field("snapshot_id", pa.int64(), nullable=False),
- pa.field("parent_id", pa.int64(), nullable=True),
- pa.field("is_current_ancestor", pa.bool_(), nullable=False),
- ])
+ history_schema = pa.schema(
+ [
+ pa.field("made_current_at", pa.timestamp(unit="ms"), nullable=False),
+ pa.field("snapshot_id", pa.int64(), nullable=False),
+ pa.field("parent_id", pa.int64(), nullable=True),
+ pa.field("is_current_ancestor", pa.bool_(), nullable=False),
+ ]
+ )
ancestors_ids = {snapshot.snapshot_id for snapshot in ancestors_of(self.tbl.current_snapshot(), self.tbl.metadata)}
@@ -464,12 +512,14 @@ def history(self) -> "pa.Table":
for snapshot_entry in metadata.snapshot_log:
snapshot = metadata.snapshot_by_id(snapshot_entry.snapshot_id)
- history.append({
- "made_current_at": datetime.fromtimestamp(snapshot_entry.timestamp_ms / 1000.0, tz=timezone.utc),
- "snapshot_id": snapshot_entry.snapshot_id,
- "parent_id": snapshot.parent_snapshot_id if snapshot else None,
- "is_current_ancestor": snapshot_entry.snapshot_id in ancestors_ids,
- })
+ history.append(
+ {
+ "made_current_at": datetime.fromtimestamp(snapshot_entry.timestamp_ms / 1000.0, tz=timezone.utc),
+ "snapshot_id": snapshot_entry.snapshot_id,
+ "parent_id": snapshot.parent_snapshot_id if snapshot else None,
+ "is_current_ancestor": snapshot_entry.snapshot_id in ancestors_ids,
+ }
+ )
return pa.Table.from_pylist(history, schema=history_schema)
@@ -483,39 +533,43 @@ def _files(self, snapshot_id: Optional[int] = None, data_file_filter: Optional[S
def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
pa_bound_type = schema_to_pyarrow(bound_type)
- return pa.struct([
- pa.field("column_size", pa.int64(), nullable=True),
- pa.field("value_count", pa.int64(), nullable=True),
- pa.field("null_value_count", pa.int64(), nullable=True),
- pa.field("nan_value_count", pa.int64(), nullable=True),
- pa.field("lower_bound", pa_bound_type, nullable=True),
- pa.field("upper_bound", pa_bound_type, nullable=True),
- ])
+ return pa.struct(
+ [
+ pa.field("column_size", pa.int64(), nullable=True),
+ pa.field("value_count", pa.int64(), nullable=True),
+ pa.field("null_value_count", pa.int64(), nullable=True),
+ pa.field("nan_value_count", pa.int64(), nullable=True),
+ pa.field("lower_bound", pa_bound_type, nullable=True),
+ pa.field("upper_bound", pa_bound_type, nullable=True),
+ ]
+ )
for field in self.tbl.metadata.schema().fields:
readable_metrics_struct.append(
pa.field(schema.find_column_name(field.field_id), _readable_metrics_struct(field.field_type), nullable=False)
)
- files_schema = pa.schema([
- pa.field("content", pa.int8(), nullable=False),
- pa.field("file_path", pa.string(), nullable=False),
- pa.field("file_format", pa.dictionary(pa.int32(), pa.string()), nullable=False),
- pa.field("spec_id", pa.int32(), nullable=False),
- pa.field("record_count", pa.int64(), nullable=False),
- pa.field("file_size_in_bytes", pa.int64(), nullable=False),
- pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True),
- pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
- pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
- pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
- pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
- pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
- pa.field("key_metadata", pa.binary(), nullable=True),
- pa.field("split_offsets", pa.list_(pa.int64()), nullable=True),
- pa.field("equality_ids", pa.list_(pa.int32()), nullable=True),
- pa.field("sort_order_id", pa.int32(), nullable=True),
- pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True),
- ])
+ files_schema = pa.schema(
+ [
+ pa.field("content", pa.int8(), nullable=False),
+ pa.field("file_path", pa.string(), nullable=False),
+ pa.field("file_format", pa.dictionary(pa.int32(), pa.string()), nullable=False),
+ pa.field("spec_id", pa.int32(), nullable=False),
+ pa.field("record_count", pa.int64(), nullable=False),
+ pa.field("file_size_in_bytes", pa.int64(), nullable=False),
+ pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True),
+ pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+ pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+ pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True),
+ pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
+ pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True),
+ pa.field("key_metadata", pa.binary(), nullable=True),
+ pa.field("split_offsets", pa.list_(pa.int64()), nullable=True),
+ pa.field("equality_ids", pa.list_(pa.int32()), nullable=True),
+ pa.field("sort_order_id", pa.int32(), nullable=True),
+ pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True),
+ ]
+ )
files: list[dict[str, Any]] = []
@@ -553,25 +607,29 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
}
for field in self.tbl.metadata.schema().fields
}
- files.append({
- "content": data_file.content,
- "file_path": data_file.file_path,
- "file_format": data_file.file_format,
- "spec_id": data_file.spec_id,
- "record_count": data_file.record_count,
- "file_size_in_bytes": data_file.file_size_in_bytes,
- "column_sizes": dict(data_file.column_sizes) if data_file.column_sizes is not None else None,
- "value_counts": dict(data_file.value_counts) if data_file.value_counts is not None else None,
- "null_value_counts": dict(data_file.null_value_counts) if data_file.null_value_counts is not None else None,
- "nan_value_counts": dict(data_file.nan_value_counts) if data_file.nan_value_counts is not None else None,
- "lower_bounds": dict(data_file.lower_bounds) if data_file.lower_bounds is not None else None,
- "upper_bounds": dict(data_file.upper_bounds) if data_file.upper_bounds is not None else None,
- "key_metadata": data_file.key_metadata,
- "split_offsets": data_file.split_offsets,
- "equality_ids": data_file.equality_ids,
- "sort_order_id": data_file.sort_order_id,
- "readable_metrics": readable_metrics,
- })
+ files.append(
+ {
+ "content": data_file.content,
+ "file_path": data_file.file_path,
+ "file_format": data_file.file_format,
+ "spec_id": data_file.spec_id,
+ "record_count": data_file.record_count,
+ "file_size_in_bytes": data_file.file_size_in_bytes,
+ "column_sizes": dict(data_file.column_sizes) if data_file.column_sizes is not None else None,
+ "value_counts": dict(data_file.value_counts) if data_file.value_counts is not None else None,
+ "null_value_counts": dict(data_file.null_value_counts)
+ if data_file.null_value_counts is not None
+ else None,
+ "nan_value_counts": dict(data_file.nan_value_counts) if data_file.nan_value_counts is not None else None,
+ "lower_bounds": dict(data_file.lower_bounds) if data_file.lower_bounds is not None else None,
+ "upper_bounds": dict(data_file.upper_bounds) if data_file.upper_bounds is not None else None,
+ "key_metadata": data_file.key_metadata,
+ "split_offsets": data_file.split_offsets,
+ "equality_ids": data_file.equality_ids,
+ "sort_order_id": data_file.sort_order_id,
+ "readable_metrics": readable_metrics,
+ }
+ )
return pa.Table.from_pylist(
files,
@@ -586,3 +644,16 @@ def data_files(self, snapshot_id: Optional[int] = None) -> "pa.Table":
def delete_files(self, snapshot_id: Optional[int] = None) -> "pa.Table":
return self._files(snapshot_id, {DataFileContent.POSITION_DELETES, DataFileContent.EQUALITY_DELETES})
+
+ def all_manifests(self) -> "pa.Table":
+ import pyarrow as pa
+
+ snapshots = self.tbl.snapshots()
+ if not snapshots:
+ return pa.Table.from_pylist([], schema=self._get_all_manifests_schema())
+
+ executor = ExecutorFactory.get_or_create()
+ manifests_by_snapshots: Iterator["pa.Table"] = executor.map(
+ lambda args: self._generate_manifests_table(*args), [(snapshot, True) for snapshot in snapshots]
+ )
+ return pa.concat_tables(manifests_by_snapshots)
diff --git a/pyiceberg/table/locations.py b/pyiceberg/table/locations.py
new file mode 100644
index 0000000000..046ee32527
--- /dev/null
+++ b/pyiceberg/table/locations.py
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import importlib
+import logging
+from abc import ABC, abstractmethod
+from typing import Optional
+
+import mmh3
+
+from pyiceberg.partitioning import PartitionKey
+from pyiceberg.table import TableProperties
+from pyiceberg.typedef import Properties
+from pyiceberg.utils.properties import property_as_bool
+
+logger = logging.getLogger(__name__)
+
+
+class LocationProvider(ABC):
+ """A base class for location providers, that provide data file locations for write tasks."""
+
+ table_location: str
+ table_properties: Properties
+
+ def __init__(self, table_location: str, table_properties: Properties):
+ self.table_location = table_location
+ self.table_properties = table_properties
+
+ @abstractmethod
+ def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
+ """Return a fully-qualified data file location for the given filename.
+
+ Args:
+ data_file_name (str): The name of the data file.
+ partition_key (Optional[PartitionKey]): The data file's partition key. If None, the data is not partitioned.
+
+ Returns:
+ str: A fully-qualified location URI for the data file.
+ """
+
+
+class SimpleLocationProvider(LocationProvider):
+ def __init__(self, table_location: str, table_properties: Properties):
+ super().__init__(table_location, table_properties)
+
+ def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
+ prefix = f"{self.table_location}/data"
+ return f"{prefix}/{partition_key.to_path()}/{data_file_name}" if partition_key else f"{prefix}/{data_file_name}"
+
+
+class ObjectStoreLocationProvider(LocationProvider):
+ HASH_BINARY_STRING_BITS = 20
+ ENTROPY_DIR_LENGTH = 4
+ ENTROPY_DIR_DEPTH = 3
+
+ _include_partition_paths: bool
+
+ def __init__(self, table_location: str, table_properties: Properties):
+ super().__init__(table_location, table_properties)
+ self._include_partition_paths = property_as_bool(
+ self.table_properties,
+ TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS,
+ TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT,
+ )
+
+ def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
+ if self._include_partition_paths and partition_key:
+ return self.new_data_location(f"{partition_key.to_path()}/{data_file_name}")
+
+ prefix = f"{self.table_location}/data"
+ hashed_path = self._compute_hash(data_file_name)
+
+ return (
+ f"{prefix}/{hashed_path}/{data_file_name}"
+ if self._include_partition_paths
+ else f"{prefix}/{hashed_path}-{data_file_name}"
+ )
+
+ @staticmethod
+ def _compute_hash(data_file_name: str) -> str:
+ # Bitwise AND to combat sign-extension; bitwise OR to preserve leading zeroes that `bin` would otherwise strip.
+ top_mask = 1 << ObjectStoreLocationProvider.HASH_BINARY_STRING_BITS
+ hash_code = mmh3.hash(data_file_name) & (top_mask - 1) | top_mask
+ return ObjectStoreLocationProvider._dirs_from_hash(bin(hash_code)[-ObjectStoreLocationProvider.HASH_BINARY_STRING_BITS :])
+
+ @staticmethod
+ def _dirs_from_hash(file_hash: str) -> str:
+ """Divides hash into directories for optimized orphan removal operation using ENTROPY_DIR_DEPTH and ENTROPY_DIR_LENGTH."""
+ total_entropy_length = ObjectStoreLocationProvider.ENTROPY_DIR_DEPTH * ObjectStoreLocationProvider.ENTROPY_DIR_LENGTH
+
+ hash_with_dirs = []
+ for i in range(0, total_entropy_length, ObjectStoreLocationProvider.ENTROPY_DIR_LENGTH):
+ hash_with_dirs.append(file_hash[i : i + ObjectStoreLocationProvider.ENTROPY_DIR_LENGTH])
+
+ if len(file_hash) > total_entropy_length:
+ hash_with_dirs.append(file_hash[total_entropy_length:])
+
+ return "/".join(hash_with_dirs)
+
+
+def _import_location_provider(
+ location_provider_impl: str, table_location: str, table_properties: Properties
+) -> Optional[LocationProvider]:
+ try:
+ path_parts = location_provider_impl.split(".")
+ if len(path_parts) < 2:
+ raise ValueError(
+ f"{TableProperties.WRITE_PY_LOCATION_PROVIDER_IMPL} should be full path (module.CustomLocationProvider), got: {location_provider_impl}"
+ )
+ module_name, class_name = ".".join(path_parts[:-1]), path_parts[-1]
+ module = importlib.import_module(module_name)
+ class_ = getattr(module, class_name)
+ return class_(table_location, table_properties)
+ except ModuleNotFoundError:
+ logger.warning("Could not initialize LocationProvider: %s", location_provider_impl)
+ return None
+
+
+def load_location_provider(table_location: str, table_properties: Properties) -> LocationProvider:
+ table_location = table_location.rstrip("/")
+
+ if location_provider_impl := table_properties.get(TableProperties.WRITE_PY_LOCATION_PROVIDER_IMPL):
+ if location_provider := _import_location_provider(location_provider_impl, table_location, table_properties):
+ logger.info("Loaded LocationProvider: %s", location_provider_impl)
+ return location_provider
+ else:
+ raise ValueError(f"Could not initialize LocationProvider: {location_provider_impl}")
+
+ if property_as_bool(table_properties, TableProperties.OBJECT_STORE_ENABLED, TableProperties.OBJECT_STORE_ENABLED_DEFAULT):
+ return ObjectStoreLocationProvider(table_location, table_properties)
+ else:
+ return SimpleLocationProvider(table_location, table_properties)
diff --git a/pyiceberg/utils/decimal.py b/pyiceberg/utils/decimal.py
index 4432564dd1..99638d2a00 100644
--- a/pyiceberg/utils/decimal.py
+++ b/pyiceberg/utils/decimal.py
@@ -85,7 +85,7 @@ def bytes_to_decimal(value: bytes, scale: int) -> Decimal:
"""Return a decimal from the bytes.
Args:
- value (bytes): tbe bytes to be converted into a decimal.
+ value (bytes): the bytes to be converted into a decimal.
scale (int): the scale of the decimal.
Returns:
diff --git a/pyiceberg/utils/deprecated.py b/pyiceberg/utils/deprecated.py
index da2cb3b500..b196f47ec6 100644
--- a/pyiceberg/utils/deprecated.py
+++ b/pyiceberg/utils/deprecated.py
@@ -56,7 +56,6 @@ def deprecation_message(deprecated_in: str, removed_in: str, help_message: Optio
def _deprecation_warning(message: str) -> None:
with warnings.catch_warnings(): # temporarily override warning handling
- warnings.simplefilter("always", DeprecationWarning) # turn off filter
warnings.warn(
message,
category=DeprecationWarning,
diff --git a/pyiceberg/utils/singleton.py b/pyiceberg/utils/singleton.py
index 8a4bbf91ce..06ee62febe 100644
--- a/pyiceberg/utils/singleton.py
+++ b/pyiceberg/utils/singleton.py
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
"""
-This is a singleton metaclass that can be used to cache and re-use existing objects.
+This is a singleton metaclass that can be used to cache and reuse existing objects.
In the Iceberg codebase we have a lot of objects that are stateless (for example Types such as StringType,
BooleanType etc). FixedTypes have arguments (eg. Fixed[22]) that we also make part of the key when caching
diff --git a/pyproject.toml b/pyproject.toml
index 715388c290..5d2808db94 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@
# under the License.
[tool.poetry]
name = "pyiceberg"
-version = "0.8.0"
+version = "0.9.0"
readme = "README.md"
homepage = "https://py.iceberg.apache.org/"
repository = "https://github.com/apache/iceberg-python"
@@ -83,7 +83,7 @@ pyiceberg-core = { version = "^0.4.0", optional = true }
[tool.poetry.group.dev.dependencies]
pytest = "7.4.4"
-pytest-checkdocs = "2.10.1"
+pytest-checkdocs = "2.13.0"
pytest-lazy-fixture = "0.6.3"
pre-commit = "4.0.1"
fastavro = "1.10.0"
@@ -94,8 +94,23 @@ typing-extensions = "4.12.2"
pytest-mock = "3.14.0"
pyspark = "3.5.3"
cython = "3.0.11"
-deptry = ">=0.14,<0.22"
+deptry = ">=0.14,<0.23"
docutils = "!=0.21.post1" # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520
+
+[tool.poetry.group.docs.dependencies]
+# for mkdocs
+mkdocs = "1.6.1"
+griffe = "1.5.4"
+jinja2 = "3.1.5"
+mkdocstrings = "0.27.0"
+mkdocstrings-python = "1.13.0"
+mkdocs-literate-nav = "0.6.1"
+mkdocs-autorefs = "1.3.0"
+mkdocs-gen-files = "0.5.0"
+mkdocs-material = "9.5.49"
+mkdocs-material-extensions = "1.3.1"
+mkdocs-section-index = "0.3.9"
+
[[tool.mypy.overrides]]
module = "pytest_mock.*"
ignore_missing_imports = true
@@ -864,6 +879,310 @@ ignore_missing_imports = true
module = "tenacity.*"
ignore_missing_imports = true
+[[tool.mypy.overrides]]
+module = "pyarrow.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pandas.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "snappy.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "zstandard.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pydantic.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pydantic_core.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pytest.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "fastavro.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "mmh3.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "hive_metastore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "thrift.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "requests_mock.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "click.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "rich.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "fsspec.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "s3fs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "azure.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "adlfs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "gcsfs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "packaging.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "tests.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "boto3"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "botocore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "mypy_boto3_glue.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "moto"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "aiobotocore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "aiohttp.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "duckdb.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "ray.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "daft.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyparsing.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyspark.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "strictyaml.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "sortedcontainers.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "sqlalchemy.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "Cython.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "setuptools.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "tenacity.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyarrow.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pandas.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "snappy.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "zstandard.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pydantic.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pydantic_core.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pytest.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "fastavro.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "mmh3.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "hive_metastore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "thrift.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "requests_mock.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "click.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "rich.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "fsspec.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "s3fs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "azure.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "adlfs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "gcsfs.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "packaging.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "tests.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "boto3"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "botocore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "mypy_boto3_glue.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "moto"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "aiobotocore.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "aiohttp.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "duckdb.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "ray.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "daft.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyparsing.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "pyspark.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "strictyaml.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "sortedcontainers.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "sqlalchemy.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "Cython.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "setuptools.*"
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "tenacity.*"
+ignore_missing_imports = true
+
[tool.poetry.scripts]
pyiceberg = "pyiceberg.cli.console:run"
diff --git a/ruff.toml b/ruff.toml
index caaa108c84..11fd2a957b 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -58,7 +58,7 @@ select = [
"I", # isort
"UP", # pyupgrade
]
-ignore = ["E501","E203","B024","B028","UP037"]
+ignore = ["E501","E203","B024","B028","UP037", "UP035", "UP006"]
# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
diff --git a/tests/avro/test_resolver.py b/tests/avro/test_resolver.py
index decd9060a4..b5388b5ebb 100644
--- a/tests/avro/test_resolver.py
+++ b/tests/avro/test_resolver.py
@@ -322,30 +322,34 @@ def test_resolver_initial_value() -> None:
def test_resolve_writer() -> None:
actual = resolve_writer(record_schema=MANIFEST_ENTRY_SCHEMAS[2], file_schema=MANIFEST_ENTRY_SCHEMAS[1])
- expected = StructWriter((
- (0, IntegerWriter()),
- (1, IntegerWriter()),
+ expected = StructWriter(
(
- 4,
- StructWriter((
- (1, StringWriter()),
- (2, StringWriter()),
- (3, StructWriter(())),
- (4, IntegerWriter()),
- (5, IntegerWriter()),
- (None, DefaultWriter(writer=IntegerWriter(), value=67108864)),
- (6, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
- (7, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
- (8, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
- (9, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
- (10, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=BinaryWriter()))),
- (11, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=BinaryWriter()))),
- (12, OptionWriter(option=BinaryWriter())),
- (13, OptionWriter(option=ListWriter(element_writer=IntegerWriter()))),
- (15, OptionWriter(option=IntegerWriter())),
- )),
- ),
- ))
+ (0, IntegerWriter()),
+ (1, IntegerWriter()),
+ (
+ 4,
+ StructWriter(
+ (
+ (1, StringWriter()),
+ (2, StringWriter()),
+ (3, StructWriter(())),
+ (4, IntegerWriter()),
+ (5, IntegerWriter()),
+ (None, DefaultWriter(writer=IntegerWriter(), value=67108864)),
+ (6, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
+ (7, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
+ (8, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
+ (9, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=IntegerWriter()))),
+ (10, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=BinaryWriter()))),
+ (11, OptionWriter(option=MapWriter(key_writer=IntegerWriter(), value_writer=BinaryWriter()))),
+ (12, OptionWriter(option=BinaryWriter())),
+ (13, OptionWriter(option=ListWriter(element_writer=IntegerWriter()))),
+ (15, OptionWriter(option=IntegerWriter())),
+ )
+ ),
+ ),
+ )
+ )
assert actual == expected
diff --git a/tests/avro/test_writer.py b/tests/avro/test_writer.py
index 5a531c7748..39b8ecc393 100644
--- a/tests/avro/test_writer.py
+++ b/tests/avro/test_writer.py
@@ -178,15 +178,17 @@ class MyStruct(Record):
construct_writer(schema).write(encoder, my_struct)
- assert output.getbuffer() == b"".join([
- b"\x18",
- zigzag_encode(len(my_struct.properties)),
- zigzag_encode(1),
- zigzag_encode(2),
- zigzag_encode(3),
- zigzag_encode(4),
- b"\x00",
- ])
+ assert output.getbuffer() == b"".join(
+ [
+ b"\x18",
+ zigzag_encode(len(my_struct.properties)),
+ zigzag_encode(1),
+ zigzag_encode(2),
+ zigzag_encode(3),
+ zigzag_encode(4),
+ b"\x00",
+ ]
+ )
def test_write_struct_with_list() -> None:
@@ -206,15 +208,17 @@ class MyStruct(Record):
construct_writer(schema).write(encoder, my_struct)
- assert output.getbuffer() == b"".join([
- b"\x18",
- zigzag_encode(len(my_struct.properties)),
- zigzag_encode(1),
- zigzag_encode(2),
- zigzag_encode(3),
- zigzag_encode(4),
- b"\x00",
- ])
+ assert output.getbuffer() == b"".join(
+ [
+ b"\x18",
+ zigzag_encode(len(my_struct.properties)),
+ zigzag_encode(1),
+ zigzag_encode(2),
+ zigzag_encode(3),
+ zigzag_encode(4),
+ b"\x00",
+ ]
+ )
def test_write_decimal() -> None:
diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py
index 091a67166b..21aa9677bd 100644
--- a/tests/catalog/test_rest.py
+++ b/tests/catalog/test_rest.py
@@ -121,6 +121,9 @@ def test_no_uri_supplied() -> None:
RestCatalog("production")
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_token_200(rest_mock: Mocker) -> None:
rest_mock.post(
f"{TEST_URI}v1/oauth/tokens",
@@ -141,6 +144,9 @@ def test_token_200(rest_mock: Mocker) -> None:
)
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_token_200_without_optional_fields(rest_mock: Mocker) -> None:
rest_mock.post(
f"{TEST_URI}v1/oauth/tokens",
@@ -157,6 +163,9 @@ def test_token_200_without_optional_fields(rest_mock: Mocker) -> None:
)
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_token_with_optional_oauth_params(rest_mock: Mocker) -> None:
mock_request = rest_mock.post(
f"{TEST_URI}v1/oauth/tokens",
@@ -179,6 +188,9 @@ def test_token_with_optional_oauth_params(rest_mock: Mocker) -> None:
assert TEST_RESOURCE in mock_request.last_request.text
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_token_with_optional_oauth_params_as_empty(rest_mock: Mocker) -> None:
mock_request = rest_mock.post(
f"{TEST_URI}v1/oauth/tokens",
@@ -199,6 +211,9 @@ def test_token_with_optional_oauth_params_as_empty(rest_mock: Mocker) -> None:
assert TEST_RESOURCE not in mock_request.last_request.text
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_token_with_default_scope(rest_mock: Mocker) -> None:
mock_request = rest_mock.post(
f"{TEST_URI}v1/oauth/tokens",
@@ -217,6 +232,9 @@ def test_token_with_default_scope(rest_mock: Mocker) -> None:
assert "catalog" in mock_request.last_request.text
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_token_with_custom_scope(rest_mock: Mocker) -> None:
mock_request = rest_mock.post(
f"{TEST_URI}v1/oauth/tokens",
@@ -236,6 +254,9 @@ def test_token_with_custom_scope(rest_mock: Mocker) -> None:
assert TEST_SCOPE in mock_request.last_request.text
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_token_200_w_auth_url(rest_mock: Mocker) -> None:
rest_mock.post(
TEST_AUTH_URL,
@@ -258,6 +279,9 @@ def test_token_200_w_auth_url(rest_mock: Mocker) -> None:
# pylint: enable=W0212
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_config_200(requests_mock: Mocker) -> None:
requests_mock.get(
f"{TEST_URI}v1/config",
@@ -299,19 +323,19 @@ def test_properties_sets_headers(requests_mock: Mocker) -> None:
**{"header.Content-Type": "application/vnd.api+json", "header.Customized-Header": "some/value"},
)
- assert catalog._session.headers.get("Content-type") == "application/json", (
- "Expected 'Content-Type' default header not to be overwritten"
- )
- assert requests_mock.last_request.headers["Content-type"] == "application/json", (
- "Config request did not include expected 'Content-Type' header"
- )
+ assert (
+ catalog._session.headers.get("Content-type") == "application/json"
+ ), "Expected 'Content-Type' default header not to be overwritten"
+ assert (
+ requests_mock.last_request.headers["Content-type"] == "application/json"
+ ), "Config request did not include expected 'Content-Type' header"
- assert catalog._session.headers.get("Customized-Header") == "some/value", (
- "Expected 'Customized-Header' header to be 'some/value'"
- )
- assert requests_mock.last_request.headers["Customized-Header"] == "some/value", (
- "Config request did not include expected 'Customized-Header' header"
- )
+ assert (
+ catalog._session.headers.get("Customized-Header") == "some/value"
+ ), "Expected 'Customized-Header' header to be 'some/value'"
+ assert (
+ requests_mock.last_request.headers["Customized-Header"] == "some/value"
+ ), "Config request did not include expected 'Customized-Header' header"
def test_config_sets_headers(requests_mock: Mocker) -> None:
@@ -328,21 +352,24 @@ def test_config_sets_headers(requests_mock: Mocker) -> None:
catalog = RestCatalog("rest", uri=TEST_URI, warehouse="s3://some-bucket")
catalog.create_namespace(namespace)
- assert catalog._session.headers.get("Content-type") == "application/json", (
- "Expected 'Content-Type' default header not to be overwritten"
- )
- assert requests_mock.last_request.headers["Content-type"] == "application/json", (
- "Create namespace request did not include expected 'Content-Type' header"
- )
+ assert (
+ catalog._session.headers.get("Content-type") == "application/json"
+ ), "Expected 'Content-Type' default header not to be overwritten"
+ assert (
+ requests_mock.last_request.headers["Content-type"] == "application/json"
+ ), "Create namespace request did not include expected 'Content-Type' header"
- assert catalog._session.headers.get("Customized-Header") == "some/value", (
- "Expected 'Customized-Header' header to be 'some/value'"
- )
- assert requests_mock.last_request.headers["Customized-Header"] == "some/value", (
- "Create namespace request did not include expected 'Customized-Header' header"
- )
+ assert (
+ catalog._session.headers.get("Customized-Header") == "some/value"
+ ), "Expected 'Customized-Header' header to be 'some/value'"
+ assert (
+ requests_mock.last_request.headers["Customized-Header"] == "some/value"
+ ), "Create namespace request did not include expected 'Customized-Header' header"
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_token_400(rest_mock: Mocker) -> None:
rest_mock.post(
f"{TEST_URI}v1/oauth/tokens",
@@ -356,6 +383,9 @@ def test_token_400(rest_mock: Mocker) -> None:
assert str(e.value) == "invalid_client: Credentials for key invalid_key do not match"
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_token_401(rest_mock: Mocker) -> None:
message = "invalid_client"
rest_mock.post(
@@ -489,6 +519,9 @@ def test_list_namespace_with_parent_200(rest_mock: Mocker) -> None:
]
+@pytest.mark.filterwarnings(
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
+)
def test_list_namespaces_token_expired(rest_mock: Mocker) -> None:
new_token = "new_jwt_token"
new_header = dict(TEST_HEADERS)
diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py
index 7f72568b41..cffc14d9d7 100644
--- a/tests/catalog/test_sql.py
+++ b/tests/catalog/test_sql.py
@@ -401,12 +401,14 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier)
pa.array([True, None, False, True]), # 'baz' column
pa.array([None, "A", "B", "C"]), # 'large' column
],
- schema=pa.schema([
- pa.field("foo", pa.large_string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=False),
- pa.field("baz", pa.bool_(), nullable=True),
- pa.field("large", pa.large_string(), nullable=True),
- ]),
+ schema=pa.schema(
+ [
+ pa.field("foo", pa.large_string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=False),
+ pa.field("baz", pa.bool_(), nullable=True),
+ pa.field("large", pa.large_string(), nullable=True),
+ ]
+ ),
)
namespace = Catalog.namespace_from(table_identifier)
catalog.create_namespace(namespace)
@@ -1426,10 +1428,12 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None:
"foo": ["a", None, "z"],
"bar": [19, None, 25],
},
- schema=pa.schema([
- pa.field("foo", pa.large_string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=True),
- ]),
+ schema=pa.schema(
+ [
+ pa.field("foo", pa.large_string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=True),
+ ]
+ ),
)
with tbl.transaction() as txn:
@@ -1474,10 +1478,12 @@ def test_create_table_transaction(catalog: SqlCatalog, format_version: int) -> N
"foo": ["a", None, "z"],
"bar": [19, None, 25],
},
- schema=pa.schema([
- pa.field("foo", pa.large_string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=True),
- ]),
+ schema=pa.schema(
+ [
+ pa.field("foo", pa.large_string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=True),
+ ]
+ ),
)
with catalog.create_table_transaction(
diff --git a/tests/conftest.py b/tests/conftest.py
index 22329b3882..ef980f3818 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -353,49 +353,57 @@ def table_schema_with_all_types() -> Schema:
def pyarrow_schema_simple_without_ids() -> "pa.Schema":
import pyarrow as pa
- return pa.schema([
- pa.field("foo", pa.string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=False),
- pa.field("baz", pa.bool_(), nullable=True),
- ])
+ return pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=False),
+ pa.field("baz", pa.bool_(), nullable=True),
+ ]
+ )
@pytest.fixture(scope="session")
def pyarrow_schema_nested_without_ids() -> "pa.Schema":
import pyarrow as pa
- return pa.schema([
- pa.field("foo", pa.string(), nullable=False),
- pa.field("bar", pa.int32(), nullable=False),
- pa.field("baz", pa.bool_(), nullable=True),
- pa.field("qux", pa.list_(pa.string()), nullable=False),
- pa.field(
- "quux",
- pa.map_(
- pa.string(),
- pa.map_(pa.string(), pa.int32()),
+ return pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=False),
+ pa.field("bar", pa.int32(), nullable=False),
+ pa.field("baz", pa.bool_(), nullable=True),
+ pa.field("qux", pa.list_(pa.string()), nullable=False),
+ pa.field(
+ "quux",
+ pa.map_(
+ pa.string(),
+ pa.map_(pa.string(), pa.int32()),
+ ),
+ nullable=False,
),
- nullable=False,
- ),
- pa.field(
- "location",
- pa.list_(
- pa.struct([
- pa.field("latitude", pa.float32(), nullable=False),
- pa.field("longitude", pa.float32(), nullable=False),
- ]),
+ pa.field(
+ "location",
+ pa.list_(
+ pa.struct(
+ [
+ pa.field("latitude", pa.float32(), nullable=False),
+ pa.field("longitude", pa.float32(), nullable=False),
+ ]
+ ),
+ ),
+ nullable=False,
),
- nullable=False,
- ),
- pa.field(
- "person",
- pa.struct([
- pa.field("name", pa.string(), nullable=True),
- pa.field("age", pa.int32(), nullable=False),
- ]),
- nullable=True,
- ),
- ])
+ pa.field(
+ "person",
+ pa.struct(
+ [
+ pa.field("name", pa.string(), nullable=True),
+ pa.field("age", pa.int32(), nullable=False),
+ ]
+ ),
+ nullable=True,
+ ),
+ ]
+ )
@pytest.fixture(scope="session")
@@ -2314,26 +2322,28 @@ def spark() -> "SparkSession":
def pa_schema() -> "pa.Schema":
import pyarrow as pa
- return pa.schema([
- ("bool", pa.bool_()),
- ("string", pa.large_string()),
- ("string_long", pa.large_string()),
- ("int", pa.int32()),
- ("long", pa.int64()),
- ("float", pa.float32()),
- ("double", pa.float64()),
- # Not supported by Spark
- # ("time", pa.time64('us')),
- ("timestamp", pa.timestamp(unit="us")),
- ("timestamptz", pa.timestamp(unit="us", tz="UTC")),
- ("date", pa.date32()),
- # Not supported by Spark
- # ("time", pa.time64("us")),
- # Not natively supported by Arrow
- # ("uuid", pa.fixed(16)),
- ("binary", pa.large_binary()),
- ("fixed", pa.binary(16)),
- ])
+ return pa.schema(
+ [
+ ("bool", pa.bool_()),
+ ("string", pa.large_string()),
+ ("string_long", pa.large_string()),
+ ("int", pa.int32()),
+ ("long", pa.int64()),
+ ("float", pa.float32()),
+ ("double", pa.float64()),
+ # Not supported by Spark
+ # ("time", pa.time64('us')),
+ ("timestamp", pa.timestamp(unit="us")),
+ ("timestamptz", pa.timestamp(unit="us", tz="UTC")),
+ ("date", pa.date32()),
+ # Not supported by Spark
+ # ("time", pa.time64("us")),
+ # Not natively supported by Arrow
+ # ("uuid", pa.fixed(16)),
+ ("binary", pa.large_binary()),
+ ("fixed", pa.binary(16)),
+ ]
+ )
@pytest.fixture(scope="session")
@@ -2415,11 +2425,13 @@ def arrow_table_date_timestamps() -> "pa.Table":
None,
],
},
- schema=pa.schema([
- ("date", pa.date32()),
- ("timestamp", pa.timestamp(unit="us")),
- ("timestamptz", pa.timestamp(unit="us", tz="UTC")),
- ]),
+ schema=pa.schema(
+ [
+ ("date", pa.date32()),
+ ("timestamp", pa.timestamp(unit="us")),
+ ("timestamptz", pa.timestamp(unit="us", tz="UTC")),
+ ]
+ ),
)
@@ -2438,19 +2450,21 @@ def arrow_table_schema_with_all_timestamp_precisions() -> "pa.Schema":
"""Pyarrow Schema with all supported timestamp types."""
import pyarrow as pa
- return pa.schema([
- ("timestamp_s", pa.timestamp(unit="s")),
- ("timestamptz_s", pa.timestamp(unit="s", tz="UTC")),
- ("timestamp_ms", pa.timestamp(unit="ms")),
- ("timestamptz_ms", pa.timestamp(unit="ms", tz="UTC")),
- ("timestamp_us", pa.timestamp(unit="us")),
- ("timestamptz_us", pa.timestamp(unit="us", tz="UTC")),
- ("timestamp_ns", pa.timestamp(unit="ns")),
- ("timestamptz_ns", pa.timestamp(unit="ns", tz="UTC")),
- ("timestamptz_us_etc_utc", pa.timestamp(unit="us", tz="Etc/UTC")),
- ("timestamptz_ns_z", pa.timestamp(unit="ns", tz="Z")),
- ("timestamptz_s_0000", pa.timestamp(unit="s", tz="+00:00")),
- ])
+ return pa.schema(
+ [
+ ("timestamp_s", pa.timestamp(unit="s")),
+ ("timestamptz_s", pa.timestamp(unit="s", tz="UTC")),
+ ("timestamp_ms", pa.timestamp(unit="ms")),
+ ("timestamptz_ms", pa.timestamp(unit="ms", tz="UTC")),
+ ("timestamp_us", pa.timestamp(unit="us")),
+ ("timestamptz_us", pa.timestamp(unit="us", tz="UTC")),
+ ("timestamp_ns", pa.timestamp(unit="ns")),
+ ("timestamptz_ns", pa.timestamp(unit="ns", tz="UTC")),
+ ("timestamptz_us_etc_utc", pa.timestamp(unit="us", tz="Etc/UTC")),
+ ("timestamptz_ns_z", pa.timestamp(unit="ns", tz="Z")),
+ ("timestamptz_s_0000", pa.timestamp(unit="s", tz="+00:00")),
+ ]
+ )
@pytest.fixture(scope="session")
@@ -2459,51 +2473,53 @@ def arrow_table_with_all_timestamp_precisions(arrow_table_schema_with_all_timest
import pandas as pd
import pyarrow as pa
- test_data = pd.DataFrame({
- "timestamp_s": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
- "timestamptz_s": [
- datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
- None,
- datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
- ],
- "timestamp_ms": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
- "timestamptz_ms": [
- datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
- None,
- datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
- ],
- "timestamp_us": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
- "timestamptz_us": [
- datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
- None,
- datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
- ],
- "timestamp_ns": [
- pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=6),
- None,
- pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=7),
- ],
- "timestamptz_ns": [
- datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
- None,
- datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
- ],
- "timestamptz_us_etc_utc": [
- datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
- None,
- datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
- ],
- "timestamptz_ns_z": [
- pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=6, tz="UTC"),
- None,
- pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=7, tz="UTC"),
- ],
- "timestamptz_s_0000": [
- datetime(2023, 1, 1, 19, 25, 1, tzinfo=timezone.utc),
- None,
- datetime(2023, 3, 1, 19, 25, 1, tzinfo=timezone.utc),
- ],
- })
+ test_data = pd.DataFrame(
+ {
+ "timestamp_s": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
+ "timestamptz_s": [
+ datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+ None,
+ datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+ ],
+ "timestamp_ms": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
+ "timestamptz_ms": [
+ datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+ None,
+ datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+ ],
+ "timestamp_us": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)],
+ "timestamptz_us": [
+ datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+ None,
+ datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+ ],
+ "timestamp_ns": [
+ pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=6),
+ None,
+ pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=7),
+ ],
+ "timestamptz_ns": [
+ datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+ None,
+ datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+ ],
+ "timestamptz_us_etc_utc": [
+ datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
+ None,
+ datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc),
+ ],
+ "timestamptz_ns_z": [
+ pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=6, tz="UTC"),
+ None,
+ pd.Timestamp(year=2024, month=7, day=11, hour=3, minute=30, second=0, microsecond=12, nanosecond=7, tz="UTC"),
+ ],
+ "timestamptz_s_0000": [
+ datetime(2023, 1, 1, 19, 25, 1, tzinfo=timezone.utc),
+ None,
+ datetime(2023, 3, 1, 19, 25, 1, tzinfo=timezone.utc),
+ ],
+ }
+ )
return pa.Table.from_pandas(test_data, schema=arrow_table_schema_with_all_timestamp_precisions)
@@ -2512,19 +2528,21 @@ def arrow_table_schema_with_all_microseconds_timestamp_precisions() -> "pa.Schem
"""Pyarrow Schema with all microseconds timestamp."""
import pyarrow as pa
- return pa.schema([
- ("timestamp_s", pa.timestamp(unit="us")),
- ("timestamptz_s", pa.timestamp(unit="us", tz="UTC")),
- ("timestamp_ms", pa.timestamp(unit="us")),
- ("timestamptz_ms", pa.timestamp(unit="us", tz="UTC")),
- ("timestamp_us", pa.timestamp(unit="us")),
- ("timestamptz_us", pa.timestamp(unit="us", tz="UTC")),
- ("timestamp_ns", pa.timestamp(unit="us")),
- ("timestamptz_ns", pa.timestamp(unit="us", tz="UTC")),
- ("timestamptz_us_etc_utc", pa.timestamp(unit="us", tz="UTC")),
- ("timestamptz_ns_z", pa.timestamp(unit="us", tz="UTC")),
- ("timestamptz_s_0000", pa.timestamp(unit="us", tz="UTC")),
- ])
+ return pa.schema(
+ [
+ ("timestamp_s", pa.timestamp(unit="us")),
+ ("timestamptz_s", pa.timestamp(unit="us", tz="UTC")),
+ ("timestamp_ms", pa.timestamp(unit="us")),
+ ("timestamptz_ms", pa.timestamp(unit="us", tz="UTC")),
+ ("timestamp_us", pa.timestamp(unit="us")),
+ ("timestamptz_us", pa.timestamp(unit="us", tz="UTC")),
+ ("timestamp_ns", pa.timestamp(unit="us")),
+ ("timestamptz_ns", pa.timestamp(unit="us", tz="UTC")),
+ ("timestamptz_us_etc_utc", pa.timestamp(unit="us", tz="UTC")),
+ ("timestamptz_ns_z", pa.timestamp(unit="us", tz="UTC")),
+ ("timestamptz_s_0000", pa.timestamp(unit="us", tz="UTC")),
+ ]
+ )
@pytest.fixture(scope="session")
@@ -2578,13 +2596,15 @@ def pyarrow_schema_with_promoted_types() -> "pa.Schema":
"""Pyarrow Schema with longs, doubles and uuid in simple and nested types."""
import pyarrow as pa
- return pa.schema((
- pa.field("long", pa.int32(), nullable=True), # can support upcasting integer to long
- pa.field("list", pa.list_(pa.int32()), nullable=False), # can support upcasting integer to long
- pa.field("map", pa.map_(pa.string(), pa.int32()), nullable=False), # can support upcasting integer to long
- pa.field("double", pa.float32(), nullable=True), # can support upcasting float to double
- pa.field("uuid", pa.binary(length=16), nullable=True), # can support upcasting float to double
- ))
+ return pa.schema(
+ (
+ pa.field("long", pa.int32(), nullable=True), # can support upcasting integer to long
+ pa.field("list", pa.list_(pa.int32()), nullable=False), # can support upcasting integer to long
+ pa.field("map", pa.map_(pa.string(), pa.int32()), nullable=False), # can support upcasting integer to long
+ pa.field("double", pa.float32(), nullable=True), # can support upcasting float to double
+ pa.field("uuid", pa.binary(length=16), nullable=True), # can support upcasting float to double
+ )
+ )
@pytest.fixture(scope="session")
diff --git a/tests/expressions/test_evaluator.py b/tests/expressions/test_evaluator.py
index f8a9a8806d..e2b1f27377 100644
--- a/tests/expressions/test_evaluator.py
+++ b/tests/expressions/test_evaluator.py
@@ -681,25 +681,25 @@ def data_file_nan() -> DataFile:
def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file_nan: Schema, data_file_nan: DataFile) -> None:
for operator in [LessThan, LessThanOrEqual]:
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: all nan column doesn't contain number"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: 1 is smaller than lower bound"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan) # type: ignore[arg-type]
assert should_read, "Should match: 10 is larger than lower bound"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert should_read, "Should match: no visibility"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: all nan column doesn't contain number"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: 1 is smaller than lower bound"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval(
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval( # type: ignore[arg-type]
data_file_nan
)
assert should_read, "Should match: 10 larger than lower bound"
@@ -709,30 +709,30 @@ def test_inclusive_metrics_evaluator_greater_than_and_greater_than_equal(
schema_data_file_nan: Schema, data_file_nan: DataFile
) -> None:
for operator in [GreaterThan, GreaterThanOrEqual]:
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: all nan column doesn't contain number"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert should_read, "Should match: upper bound is larger than 1"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("max_nan", 10)).eval(data_file_nan) # type: ignore[arg-type]
assert should_read, "Should match: upper bound is larger than 10"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("min_max_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert should_read, "Should match: no visibility"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan_null_bounds", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: all nan column doesn't contain number"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert should_read, "Should match: 1 is smaller than upper bound"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval(
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("some_nan_correct_bounds", 10)).eval( # type: ignore[arg-type]
data_file_nan
)
assert should_read, "Should match: 10 is smaller than upper bound"
- should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 30)).eval(data_file_nan)
+ should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 30)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: 30 is greater than upper bound"
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index 085150edec..9d7a3ac094 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -70,7 +70,6 @@ def test_equals_false() -> None:
def test_is_null() -> None:
assert IsNull("foo") == parser.parse("foo is null")
assert IsNull("foo") == parser.parse("foo IS NULL")
- assert IsNull("foo") == parser.parse("table.foo IS NULL")
def test_not_null() -> None:
diff --git a/tests/expressions/test_visitors.py b/tests/expressions/test_visitors.py
index d61c193719..94bfcf076c 100644
--- a/tests/expressions/test_visitors.py
+++ b/tests/expressions/test_visitors.py
@@ -947,95 +947,95 @@ def manifest() -> ManifestFile:
def test_all_nulls(schema: Schema, manifest: ManifestFile) -> None:
- assert not _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(manifest), (
- "Should skip: all nulls column with non-floating type contains all null"
- )
+ assert not _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(
+ manifest
+ ), "Should skip: all nulls column with non-floating type contains all null"
- assert _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(manifest), (
- "Should read: no NaN information may indicate presence of NaN value"
- )
+ assert _ManifestEvalVisitor(schema, NotNull(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: no NaN information may indicate presence of NaN value"
- assert _ManifestEvalVisitor(schema, NotNull(Reference("some_nulls")), case_sensitive=True).eval(manifest), (
- "Should read: column with some nulls contains a non-null value"
- )
+ assert _ManifestEvalVisitor(schema, NotNull(Reference("some_nulls")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: column with some nulls contains a non-null value"
- assert _ManifestEvalVisitor(schema, NotNull(Reference("no_nulls")), case_sensitive=True).eval(manifest), (
- "Should read: non-null column contains a non-null value"
- )
+ assert _ManifestEvalVisitor(schema, NotNull(Reference("no_nulls")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: non-null column contains a non-null value"
def test_no_nulls(schema: Schema, manifest: ManifestFile) -> None:
- assert _ManifestEvalVisitor(schema, IsNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(manifest), (
- "Should read: at least one null value in all null column"
- )
+ assert _ManifestEvalVisitor(schema, IsNull(Reference("all_nulls_missing_nan")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: at least one null value in all null column"
- assert _ManifestEvalVisitor(schema, IsNull(Reference("some_nulls")), case_sensitive=True).eval(manifest), (
- "Should read: column with some nulls contains a null value"
- )
+ assert _ManifestEvalVisitor(schema, IsNull(Reference("some_nulls")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: column with some nulls contains a null value"
- assert not _ManifestEvalVisitor(schema, IsNull(Reference("no_nulls")), case_sensitive=True).eval(manifest), (
- "Should skip: non-null column contains no null values"
- )
+ assert not _ManifestEvalVisitor(schema, IsNull(Reference("no_nulls")), case_sensitive=True).eval(
+ manifest
+ ), "Should skip: non-null column contains no null values"
- assert _ManifestEvalVisitor(schema, IsNull(Reference("both_nan_and_null")), case_sensitive=True).eval(manifest), (
- "Should read: both_nan_and_null column contains no null values"
- )
+ assert _ManifestEvalVisitor(schema, IsNull(Reference("both_nan_and_null")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: both_nan_and_null column contains no null values"
def test_is_nan(schema: Schema, manifest: ManifestFile) -> None:
- assert _ManifestEvalVisitor(schema, IsNaN(Reference("float")), case_sensitive=True).eval(manifest), (
- "Should read: no information on if there are nan value in float column"
- )
+ assert _ManifestEvalVisitor(schema, IsNaN(Reference("float")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: no information on if there are nan value in float column"
- assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_double")), case_sensitive=True).eval(manifest), (
- "Should read: no NaN information may indicate presence of NaN value"
- )
+ assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_double")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: no NaN information may indicate presence of NaN value"
- assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(manifest), (
- "Should read: no NaN information may indicate presence of NaN value"
- )
+ assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_missing_nan_float")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: no NaN information may indicate presence of NaN value"
- assert not _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(manifest), (
- "Should skip: no nan column doesn't contain nan value"
- )
+ assert not _ManifestEvalVisitor(schema, IsNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(
+ manifest
+ ), "Should skip: no nan column doesn't contain nan value"
- assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nans")), case_sensitive=True).eval(manifest), (
- "Should read: all_nans column contains nan value"
- )
+ assert _ManifestEvalVisitor(schema, IsNaN(Reference("all_nans")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: all_nans column contains nan value"
- assert _ManifestEvalVisitor(schema, IsNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(manifest), (
- "Should read: both_nan_and_null column contains nan value"
- )
+ assert _ManifestEvalVisitor(schema, IsNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: both_nan_and_null column contains nan value"
- assert not _ManifestEvalVisitor(schema, IsNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(manifest), (
- "Should skip: no_nan_or_null column doesn't contain nan value"
- )
+ assert not _ManifestEvalVisitor(schema, IsNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(
+ manifest
+ ), "Should skip: no_nan_or_null column doesn't contain nan value"
def test_not_nan(schema: Schema, manifest: ManifestFile) -> None:
- assert _ManifestEvalVisitor(schema, NotNaN(Reference("float")), case_sensitive=True).eval(manifest), (
- "Should read: no information on if there are nan value in float column"
- )
+ assert _ManifestEvalVisitor(schema, NotNaN(Reference("float")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: no information on if there are nan value in float column"
- assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_double")), case_sensitive=True).eval(manifest), (
- "Should read: all null column contains non nan value"
- )
+ assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_double")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: all null column contains non nan value"
- assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(manifest), (
- "Should read: no_nans column contains non nan value"
- )
+ assert _ManifestEvalVisitor(schema, NotNaN(Reference("all_nulls_no_nans")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: no_nans column contains non nan value"
- assert not _ManifestEvalVisitor(schema, NotNaN(Reference("all_nans")), case_sensitive=True).eval(manifest), (
- "Should skip: all nans column doesn't contain non nan value"
- )
+ assert not _ManifestEvalVisitor(schema, NotNaN(Reference("all_nans")), case_sensitive=True).eval(
+ manifest
+ ), "Should skip: all nans column doesn't contain non nan value"
- assert _ManifestEvalVisitor(schema, NotNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(manifest), (
- "Should read: both_nan_and_null nans column contains non nan value"
- )
+ assert _ManifestEvalVisitor(schema, NotNaN(Reference("both_nan_and_null")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: both_nan_and_null nans column contains non nan value"
- assert _ManifestEvalVisitor(schema, NotNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(manifest), (
- "Should read: no_nan_or_null column contains non nan value"
- )
+ assert _ManifestEvalVisitor(schema, NotNaN(Reference("no_nan_or_null")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: no_nan_or_null column contains non nan value"
def test_missing_stats(schema: Schema, manifest_no_stats: ManifestFile) -> None:
@@ -1053,15 +1053,15 @@ def test_missing_stats(schema: Schema, manifest_no_stats: ManifestFile) -> None:
]
for expr in expressions:
- assert _ManifestEvalVisitor(schema, expr, case_sensitive=True).eval(manifest_no_stats), (
- f"Should read when missing stats for expr: {expr}"
- )
+ assert _ManifestEvalVisitor(schema, expr, case_sensitive=True).eval(
+ manifest_no_stats
+ ), f"Should read when missing stats for expr: {expr}"
def test_not(schema: Schema, manifest: ManifestFile) -> None:
- assert _ManifestEvalVisitor(schema, Not(LessThan(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(manifest), (
- "Should read: not(false)"
- )
+ assert _ManifestEvalVisitor(schema, Not(LessThan(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(
+ manifest
+ ), "Should read: not(false)"
assert not _ManifestEvalVisitor(schema, Not(GreaterThan(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(
manifest
@@ -1118,21 +1118,21 @@ def test_or(schema: Schema, manifest: ManifestFile) -> None:
def test_integer_lt(schema: Schema, manifest: ManifestFile) -> None:
- assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(manifest), (
- "Should not read: id range below lower bound (5 < 30)"
- )
+ assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(
+ manifest
+ ), "Should not read: id range below lower bound (5 < 30)"
- assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), (
- "Should not read: id range below lower bound (30 is not < 30)"
- )
+ assert not _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should not read: id range below lower bound (30 is not < 30)"
- assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE + 1), case_sensitive=True).eval(manifest), (
- "Should read: one possible id"
- )
+ assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MIN_VALUE + 1), case_sensitive=True).eval(
+ manifest
+ ), "Should read: one possible id"
- assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
- "Should read: may possible ids"
- )
+ assert _ManifestEvalVisitor(schema, LessThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should read: may possible ids"
def test_integer_lt_eq(schema: Schema, manifest: ManifestFile) -> None:
@@ -1144,13 +1144,13 @@ def test_integer_lt_eq(schema: Schema, manifest: ManifestFile) -> None:
manifest
), "Should not read: id range below lower bound (29 < 30)"
- assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), (
- "Should read: one possible id"
- )
+ assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should read: one possible id"
- assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
- "Should read: many possible ids"
- )
+ assert _ManifestEvalVisitor(schema, LessThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should read: many possible ids"
def test_integer_gt(schema: Schema, manifest: ManifestFile) -> None:
@@ -1158,17 +1158,17 @@ def test_integer_gt(schema: Schema, manifest: ManifestFile) -> None:
manifest
), "Should not read: id range above upper bound (85 < 79)"
- assert not _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
- "Should not read: id range above upper bound (79 is not > 79)"
- )
+ assert not _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should not read: id range above upper bound (79 is not > 79)"
- assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 1), case_sensitive=True).eval(manifest), (
- "Should read: one possible id"
- )
+ assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 1), case_sensitive=True).eval(
+ manifest
+ ), "Should read: one possible id"
- assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(manifest), (
- "Should read: may possible ids"
- )
+ assert _ManifestEvalVisitor(schema, GreaterThan(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(
+ manifest
+ ), "Should read: may possible ids"
def test_integer_gt_eq(schema: Schema, manifest: ManifestFile) -> None:
@@ -1180,133 +1180,133 @@ def test_integer_gt_eq(schema: Schema, manifest: ManifestFile) -> None:
manifest
), "Should not read: id range above upper bound (80 > 79)"
- assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
- "Should read: one possible id"
- )
+ assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should read: one possible id"
- assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
- "Should read: may possible ids"
- )
+ assert _ManifestEvalVisitor(schema, GreaterThanOrEqual(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should read: may possible ids"
def test_integer_eq(schema: Schema, manifest: ManifestFile) -> None:
- assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(manifest), (
- "Should not read: id below lower bound"
- )
+ assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(
+ manifest
+ ), "Should not read: id below lower bound"
- assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(manifest), (
- "Should not read: id below lower bound"
- )
+ assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(
+ manifest
+ ), "Should not read: id below lower bound"
- assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), (
- "Should read: id equal to lower bound"
- )
+ assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id equal to lower bound"
- assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(manifest), (
- "Should read: id between lower and upper bounds"
- )
+ assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id between lower and upper bounds"
- assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
- "Should read: id equal to upper bound"
- )
+ assert _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id equal to upper bound"
- assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(manifest), (
- "Should not read: id above upper bound"
- )
+ assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(
+ manifest
+ ), "Should not read: id above upper bound"
- assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(manifest), (
- "Should not read: id above upper bound"
- )
+ assert not _ManifestEvalVisitor(schema, EqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(
+ manifest
+ ), "Should not read: id above upper bound"
def test_integer_not_eq(schema: Schema, manifest: ManifestFile) -> None:
- assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(manifest), (
- "Should read: id below lower bound"
- )
+ assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 25), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id below lower bound"
- assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(manifest), (
- "Should read: id below lower bound"
- )
+ assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE - 1), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id below lower bound"
- assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(manifest), (
- "Should read: id equal to lower bound"
- )
+ assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MIN_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id equal to lower bound"
- assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(manifest), (
- "Should read: id between lower and upper bounds"
- )
+ assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE - 4), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id between lower and upper bounds"
- assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(manifest), (
- "Should read: id equal to upper bound"
- )
+ assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id equal to upper bound"
- assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(manifest), (
- "Should read: id above upper bound"
- )
+ assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 1), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id above upper bound"
- assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(manifest), (
- "Should read: id above upper bound"
- )
+ assert _ManifestEvalVisitor(schema, NotEqualTo(Reference("id"), INT_MAX_VALUE + 6), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id above upper bound"
def test_integer_not_eq_rewritten(schema: Schema, manifest: ManifestFile) -> None:
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(manifest), (
- "Should read: id below lower bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 25)), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id below lower bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 1)), case_sensitive=True).eval(manifest), (
- "Should read: id below lower bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE - 1)), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id below lower bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE)), case_sensitive=True).eval(manifest), (
- "Should read: id equal to lower bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MIN_VALUE)), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id equal to lower bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE - 4)), case_sensitive=True).eval(manifest), (
- "Should read: id between lower and upper bounds"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE - 4)), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id between lower and upper bounds"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE)), case_sensitive=True).eval(manifest), (
- "Should read: id equal to upper bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE)), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id equal to upper bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 1)), case_sensitive=True).eval(manifest), (
- "Should read: id above upper bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 1)), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id above upper bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 6)), case_sensitive=True).eval(manifest), (
- "Should read: id above upper bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("id"), INT_MAX_VALUE + 6)), case_sensitive=True).eval(
+ manifest
+ ), "Should read: id above upper bound"
def test_integer_not_eq_rewritten_case_insensitive(schema: Schema, manifest: ManifestFile) -> None:
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 25)), case_sensitive=False).eval(manifest), (
- "Should read: id below lower bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 25)), case_sensitive=False).eval(
+ manifest
+ ), "Should read: id below lower bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 1)), case_sensitive=False).eval(manifest), (
- "Should read: id below lower bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE - 1)), case_sensitive=False).eval(
+ manifest
+ ), "Should read: id below lower bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE)), case_sensitive=False).eval(manifest), (
- "Should read: id equal to lower bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MIN_VALUE)), case_sensitive=False).eval(
+ manifest
+ ), "Should read: id equal to lower bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE - 4)), case_sensitive=False).eval(manifest), (
- "Should read: id between lower and upper bounds"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE - 4)), case_sensitive=False).eval(
+ manifest
+ ), "Should read: id between lower and upper bounds"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE)), case_sensitive=False).eval(manifest), (
- "Should read: id equal to upper bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE)), case_sensitive=False).eval(
+ manifest
+ ), "Should read: id equal to upper bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 1)), case_sensitive=False).eval(manifest), (
- "Should read: id above upper bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 1)), case_sensitive=False).eval(
+ manifest
+ ), "Should read: id above upper bound"
- assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 6)), case_sensitive=False).eval(manifest), (
- "Should read: id above upper bound"
- )
+ assert _ManifestEvalVisitor(schema, Not(EqualTo(Reference("ID"), INT_MAX_VALUE + 6)), case_sensitive=False).eval(
+ manifest
+ ), "Should read: id above upper bound"
def test_integer_in(schema: Schema, manifest: ManifestFile) -> None:
@@ -1342,13 +1342,13 @@ def test_integer_in(schema: Schema, manifest: ManifestFile) -> None:
manifest
), "Should skip: in on all nulls column"
- assert _ManifestEvalVisitor(schema, In(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), (
- "Should read: in on some nulls column"
- )
+ assert _ManifestEvalVisitor(schema, In(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: in on some nulls column"
- assert _ManifestEvalVisitor(schema, In(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), (
- "Should read: in on no nulls column"
- )
+ assert _ManifestEvalVisitor(schema, In(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: in on no nulls column"
def test_integer_not_in(schema: Schema, manifest: ManifestFile) -> None:
@@ -1384,73 +1384,73 @@ def test_integer_not_in(schema: Schema, manifest: ManifestFile) -> None:
manifest
), "Should read: notIn on no nulls column"
- assert _ManifestEvalVisitor(schema, NotIn(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), (
- "Should read: in on some nulls column"
- )
+ assert _ManifestEvalVisitor(schema, NotIn(Reference("some_nulls"), ("abc", "def")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: in on some nulls column"
- assert _ManifestEvalVisitor(schema, NotIn(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(manifest), (
- "Should read: in on no nulls column"
- )
+ assert _ManifestEvalVisitor(schema, NotIn(Reference("no_nulls"), ("abc", "def")), case_sensitive=True).eval(
+ manifest
+ ), "Should read: in on no nulls column"
def test_string_starts_with(schema: Schema, manifest: ManifestFile) -> None:
- assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, StartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, StartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(manifest), (
- "Should skip: range doesn't match"
- )
+ assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(
+ manifest
+ ), "Should skip: range doesn't match"
- assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(manifest), (
- "Should skip: range doesn't match"
- )
+ assert not _ManifestEvalVisitor(schema, StartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(
+ manifest
+ ), "Should skip: range doesn't match"
def test_string_not_starts_with(schema: Schema, manifest: ManifestFile) -> None:
- assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "a"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "aa"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "dddd"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "z"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("no_nulls"), "a"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "zzzz"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
- assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(manifest), (
- "Should read: range matches"
- )
+ assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("some_nulls"), "1"), case_sensitive=False).eval(
+ manifest
+ ), "Should read: range matches"
assert _ManifestEvalVisitor(schema, NotStartsWith(Reference("all_same_value_or_null"), "a"), case_sensitive=False).eval(
manifest
diff --git a/tests/integration/test_add_files.py b/tests/integration/test_add_files.py
index 85e626edf4..c1d916e0e0 100644
--- a/tests/integration/test_add_files.py
+++ b/tests/integration/test_add_files.py
@@ -52,12 +52,14 @@
NestedField(field_id=10, name="qux", field_type=DateType(), required=False),
)
-ARROW_SCHEMA = pa.schema([
- ("foo", pa.bool_()),
- ("bar", pa.string()),
- ("baz", pa.int32()),
- ("qux", pa.date32()),
-])
+ARROW_SCHEMA = pa.schema(
+ [
+ ("foo", pa.bool_()),
+ ("bar", pa.string()),
+ ("baz", pa.int32()),
+ ("qux", pa.date32()),
+ ]
+)
ARROW_TABLE = pa.Table.from_pylist(
[
@@ -71,12 +73,14 @@
schema=ARROW_SCHEMA,
)
-ARROW_SCHEMA_WITH_IDS = pa.schema([
- pa.field("foo", pa.bool_(), nullable=False, metadata={"PARQUET:field_id": "1"}),
- pa.field("bar", pa.string(), nullable=False, metadata={"PARQUET:field_id": "2"}),
- pa.field("baz", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "3"}),
- pa.field("qux", pa.date32(), nullable=False, metadata={"PARQUET:field_id": "4"}),
-])
+ARROW_SCHEMA_WITH_IDS = pa.schema(
+ [
+ pa.field("foo", pa.bool_(), nullable=False, metadata={"PARQUET:field_id": "1"}),
+ pa.field("bar", pa.string(), nullable=False, metadata={"PARQUET:field_id": "2"}),
+ pa.field("baz", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "3"}),
+ pa.field("qux", pa.date32(), nullable=False, metadata={"PARQUET:field_id": "4"}),
+ ]
+)
ARROW_TABLE_WITH_IDS = pa.Table.from_pylist(
@@ -91,12 +95,14 @@
schema=ARROW_SCHEMA_WITH_IDS,
)
-ARROW_SCHEMA_UPDATED = pa.schema([
- ("foo", pa.bool_()),
- ("baz", pa.int32()),
- ("qux", pa.date32()),
- ("quux", pa.int32()),
-])
+ARROW_SCHEMA_UPDATED = pa.schema(
+ [
+ ("foo", pa.bool_()),
+ ("baz", pa.int32()),
+ ("qux", pa.date32()),
+ ("quux", pa.int32()),
+ ]
+)
ARROW_TABLE_UPDATED = pa.Table.from_pylist(
[
@@ -471,12 +477,14 @@ def test_add_files_fails_on_schema_mismatch(spark: SparkSession, session_catalog
identifier = f"default.table_schema_mismatch_fails_v{format_version}"
tbl = _create_table(session_catalog, identifier, format_version)
- WRONG_SCHEMA = pa.schema([
- ("foo", pa.bool_()),
- ("bar", pa.string()),
- ("baz", pa.string()), # should be integer
- ("qux", pa.date32()),
- ])
+ WRONG_SCHEMA = pa.schema(
+ [
+ ("foo", pa.bool_()),
+ ("bar", pa.string()),
+ ("baz", pa.string()), # should be integer
+ ("qux", pa.date32()),
+ ]
+ )
file_path = f"s3://warehouse/default/table_schema_mismatch_fails/v{format_version}/test.parquet"
# write parquet files
fo = tbl.io.new_output(file_path)
@@ -522,12 +530,16 @@ def test_add_files_with_large_and_regular_schema(spark: SparkSession, session_ca
identifier = f"default.unpartitioned_with_large_types{format_version}"
iceberg_schema = Schema(NestedField(1, "foo", StringType(), required=True))
- arrow_schema = pa.schema([
- pa.field("foo", pa.string(), nullable=False),
- ])
- arrow_schema_large = pa.schema([
- pa.field("foo", pa.large_string(), nullable=False),
- ])
+ arrow_schema = pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=False),
+ ]
+ )
+ arrow_schema_large = pa.schema(
+ [
+ pa.field("foo", pa.large_string(), nullable=False),
+ ]
+ )
tbl = _create_table(session_catalog, identifier, format_version, schema=iceberg_schema)
@@ -576,9 +588,11 @@ def test_add_files_with_large_and_regular_schema(spark: SparkSession, session_ca
def test_add_files_with_timestamp_tz_ns_fails(session_catalog: Catalog, format_version: int, mocker: MockerFixture) -> None:
nanoseconds_schema_iceberg = Schema(NestedField(1, "quux", TimestamptzType()))
- nanoseconds_schema = pa.schema([
- ("quux", pa.timestamp("ns", tz="UTC")),
- ])
+ nanoseconds_schema = pa.schema(
+ [
+ ("quux", pa.timestamp("ns", tz="UTC")),
+ ]
+ )
arrow_table = pa.Table.from_pylist(
[
@@ -617,9 +631,11 @@ def test_add_file_with_valid_nullability_diff(spark: SparkSession, session_catal
table_schema = Schema(
NestedField(field_id=1, name="long", field_type=LongType(), required=False),
)
- other_schema = pa.schema((
- pa.field("long", pa.int64(), nullable=False), # can support writing required pyarrow field to optional Iceberg field
- ))
+ other_schema = pa.schema(
+ (
+ pa.field("long", pa.int64(), nullable=False), # can support writing required pyarrow field to optional Iceberg field
+ )
+ )
arrow_table = pa.Table.from_pydict(
{
"long": [1, 9],
@@ -671,13 +687,15 @@ def test_add_files_with_valid_upcast(
# table's long field should cast to long on read
written_arrow_table = tbl.scan().to_arrow()
assert written_arrow_table == pyarrow_table_with_promoted_types.cast(
- pa.schema((
- pa.field("long", pa.int64(), nullable=True),
- pa.field("list", pa.large_list(pa.int64()), nullable=False),
- pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False),
- pa.field("double", pa.float64(), nullable=True),
- pa.field("uuid", pa.binary(length=16), nullable=True), # can UUID is read as fixed length binary of length 16
- ))
+ pa.schema(
+ (
+ pa.field("long", pa.int64(), nullable=True),
+ pa.field("list", pa.large_list(pa.int64()), nullable=False),
+ pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False),
+ pa.field("double", pa.float64(), nullable=True),
+ pa.field("uuid", pa.binary(length=16), nullable=True), # can UUID is read as fixed length binary of length 16
+ )
+ )
)
lhs = spark.table(f"{identifier}").toPandas()
rhs = written_arrow_table.to_pandas()
diff --git a/tests/integration/test_deletes.py b/tests/integration/test_deletes.py
index f2417bde2d..ae03beea53 100644
--- a/tests/integration/test_deletes.py
+++ b/tests/integration/test_deletes.py
@@ -746,13 +746,15 @@ def test_delete_after_partition_evolution_from_partitioned(session_catalog: Rest
arrow_table = pa.Table.from_arrays(
[
pa.array([2, 3, 4, 5, 6]),
- pa.array([
- datetime(2021, 5, 19),
- datetime(2022, 7, 25),
- datetime(2023, 3, 22),
- datetime(2024, 7, 17),
- datetime(2025, 2, 22),
- ]),
+ pa.array(
+ [
+ datetime(2021, 5, 19),
+ datetime(2022, 7, 25),
+ datetime(2023, 3, 22),
+ datetime(2024, 7, 17),
+ datetime(2025, 2, 22),
+ ]
+ ),
],
names=["idx", "ts"],
)
diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py
index 68b10f3262..75fe92a69a 100644
--- a/tests/integration/test_inspect_table.py
+++ b/tests/integration/test_inspect_table.py
@@ -846,3 +846,95 @@ def inspect_files_asserts(df: pa.Table) -> None:
inspect_files_asserts(files_df)
inspect_files_asserts(data_files_df)
inspect_files_asserts(delete_files_df)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_inspect_all_manifests(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None:
+ from pandas.testing import assert_frame_equal
+
+ identifier = "default.table_metadata_all_manifests"
+ try:
+ session_catalog.drop_table(identifier=identifier)
+ except NoSuchTableError:
+ pass
+
+ spark.sql(
+ f"""
+ CREATE TABLE {identifier} (
+ id int,
+ data string
+ )
+ PARTITIONED BY (data)
+ TBLPROPERTIES ('write.update.mode'='merge-on-read',
+ 'write.delete.mode'='merge-on-read')
+ """
+ )
+ tbl = session_catalog.load_table(identifier)
+
+ # check all_manifests when there are no snapshots
+ lhs = tbl.inspect.all_manifests().to_pandas()
+ rhs = spark.table(f"{identifier}.all_manifests").toPandas()
+ assert_frame_equal(lhs, rhs, check_dtype=False)
+
+ spark.sql(f"INSERT INTO {identifier} VALUES (1, 'a')")
+
+ spark.sql(f"INSERT INTO {identifier} VALUES (2, 'b')")
+
+ spark.sql(f"UPDATE {identifier} SET data = 'c' WHERE id = 1")
+
+ spark.sql(f"DELETE FROM {identifier} WHERE id = 2")
+
+ spark.sql(f"INSERT OVERWRITE {identifier} VALUES (1, 'a')")
+
+ tbl.refresh()
+ df = tbl.inspect.all_manifests()
+
+ assert df.column_names == [
+ "content",
+ "path",
+ "length",
+ "partition_spec_id",
+ "added_snapshot_id",
+ "added_data_files_count",
+ "existing_data_files_count",
+ "deleted_data_files_count",
+ "added_delete_files_count",
+ "existing_delete_files_count",
+ "deleted_delete_files_count",
+ "partition_summaries",
+ "reference_snapshot_id",
+ ]
+
+ int_cols = [
+ "content",
+ "length",
+ "partition_spec_id",
+ "added_snapshot_id",
+ "added_data_files_count",
+ "existing_data_files_count",
+ "deleted_data_files_count",
+ "added_delete_files_count",
+ "existing_delete_files_count",
+ "deleted_delete_files_count",
+ "reference_snapshot_id",
+ ]
+
+ for column in int_cols:
+ for value in df[column]:
+ assert isinstance(value.as_py(), int)
+
+ for value in df["path"]:
+ assert isinstance(value.as_py(), str)
+
+ for value in df["partition_summaries"]:
+ assert isinstance(value.as_py(), list)
+ for row in value:
+ assert isinstance(row["contains_null"].as_py(), bool)
+ assert isinstance(row["contains_nan"].as_py(), (bool, type(None)))
+ assert isinstance(row["lower_bound"].as_py(), (str, type(None)))
+ assert isinstance(row["upper_bound"].as_py(), (str, type(None)))
+
+ lhs = spark.table(f"{identifier}.all_manifests").toPandas()
+ rhs = df.to_pandas()
+ assert_frame_equal(lhs, rhs, check_dtype=False)
diff --git a/tests/integration/test_partitioning_key.py b/tests/integration/test_partitioning_key.py
index 29f664909c..3955259d33 100644
--- a/tests/integration/test_partitioning_key.py
+++ b/tests/integration/test_partitioning_key.py
@@ -26,7 +26,7 @@
from pyiceberg.catalog import Catalog
from pyiceberg.partitioning import PartitionField, PartitionFieldValue, PartitionKey, PartitionSpec
-from pyiceberg.schema import Schema
+from pyiceberg.schema import Schema, make_compatible_name
from pyiceberg.transforms import (
BucketTransform,
DayTransform,
@@ -70,6 +70,7 @@
NestedField(field_id=12, name="fixed_field", field_type=FixedType(16), required=False),
NestedField(field_id=13, name="decimal_field", field_type=DecimalType(5, 2), required=False),
NestedField(field_id=14, name="uuid_field", field_type=UUIDType(), required=False),
+ NestedField(field_id=15, name="special#string+field", field_type=StringType(), required=False),
)
@@ -722,6 +723,25 @@
(CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), CAST('2023-01-01' AS DATE), 'some data');
""",
),
+ # Test that special characters are URL-encoded
+ (
+ [PartitionField(source_id=15, field_id=1001, transform=IdentityTransform(), name="special#string+field")],
+ ["special string"],
+ Record(**{"special#string+field": "special string"}), # type: ignore
+ "special%23string%2Bfield=special+string",
+ f"""CREATE TABLE {identifier} (
+ `special#string+field` string
+ )
+ USING iceberg
+ PARTITIONED BY (
+ identity(`special#string+field`)
+ )
+ """,
+ f"""INSERT INTO {identifier}
+ VALUES
+ ('special string')
+ """,
+ ),
],
)
@pytest.mark.integration
@@ -768,5 +788,7 @@ def test_partition_key(
spark_path_for_justification = (
snapshot.manifests(iceberg_table.io)[0].fetch_manifest_entry(iceberg_table.io)[0].data_file.file_path
)
- assert spark_partition_for_justification == expected_partition_record
+ # Special characters in partition value are sanitized when written to the data file's partition field
+ sanitized_record = Record(**{make_compatible_name(k): v for k, v in vars(expected_partition_record).items()})
+ assert spark_partition_for_justification == sanitized_record
assert expected_hive_partition_path_slice in spark_path_for_justification
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
index 0279c2199a..f2e79bae60 100644
--- a/tests/integration/test_reads.py
+++ b/tests/integration/test_reads.py
@@ -19,6 +19,7 @@
import math
import time
import uuid
+from pathlib import PosixPath
from urllib.parse import urlparse
import pyarrow as pa
@@ -833,12 +834,14 @@ def test_table_scan_default_to_large_types(catalog: Catalog) -> None:
result_table = tbl.scan().to_arrow()
- expected_schema = pa.schema([
- pa.field("string", pa.large_string()),
- pa.field("string-to-binary", pa.large_binary()),
- pa.field("binary", pa.large_binary()),
- pa.field("list", pa.large_list(pa.large_string())),
- ])
+ expected_schema = pa.schema(
+ [
+ pa.field("string", pa.large_string()),
+ pa.field("string-to-binary", pa.large_binary()),
+ pa.field("binary", pa.large_binary()),
+ pa.field("list", pa.large_list(pa.large_string())),
+ ]
+ )
assert result_table.schema.equals(expected_schema)
@@ -874,12 +877,14 @@ def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "False"
result_table = tbl.scan().to_arrow()
- expected_schema = pa.schema([
- pa.field("string", pa.string()),
- pa.field("string-to-binary", pa.binary()),
- pa.field("binary", pa.binary()),
- pa.field("list", pa.list_(pa.string())),
- ])
+ expected_schema = pa.schema(
+ [
+ pa.field("string", pa.string()),
+ pa.field("string-to-binary", pa.binary()),
+ pa.field("binary", pa.binary()),
+ pa.field("list", pa.list_(pa.string())),
+ ]
+ )
assert result_table.schema.equals(expected_schema)
@@ -917,3 +922,31 @@ def test_table_scan_empty_table(catalog: Catalog) -> None:
result_table = tbl.scan().to_arrow()
assert len(result_table) == 0
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
+def test_read_from_s3_and_local_fs(catalog: Catalog, tmp_path: PosixPath) -> None:
+ identifier = "default.test_read_from_s3_and_local_fs"
+ schema = pa.schema([pa.field("colA", pa.string())])
+ arrow_table = pa.Table.from_arrays([pa.array(["one"])], schema=schema)
+
+ tmp_dir = tmp_path / "data"
+ tmp_dir.mkdir()
+ local_file = tmp_dir / "local_file.parquet"
+
+ try:
+ catalog.drop_table(identifier)
+ except NoSuchTableError:
+ pass
+ tbl = catalog.create_table(identifier, schema=schema)
+
+ # Append table to s3 endpoint
+ tbl.append(arrow_table)
+
+ # Append a local file
+ pq.write_table(arrow_table, local_file)
+ tbl.add_files([str(local_file)])
+
+ result_table = tbl.scan().to_arrow()
+ assert result_table["colA"].to_pylist() == ["one", "one"]
diff --git a/tests/integration/test_rest_schema.py b/tests/integration/test_rest_schema.py
index 8e64142b3f..6a704839e2 100644
--- a/tests/integration/test_rest_schema.py
+++ b/tests/integration/test_rest_schema.py
@@ -685,11 +685,13 @@ def test_rename_simple(simple_table: Table) -> None:
)
# Check that the name mapping gets updated
- assert simple_table.name_mapping() == NameMapping([
- MappedField(field_id=1, names=["foo", "vo"]),
- MappedField(field_id=2, names=["bar", "var"]),
- MappedField(field_id=3, names=["baz"]),
- ])
+ assert simple_table.name_mapping() == NameMapping(
+ [
+ MappedField(field_id=1, names=["foo", "vo"]),
+ MappedField(field_id=2, names=["bar", "var"]),
+ MappedField(field_id=3, names=["baz"]),
+ ]
+ )
@pytest.mark.integration
@@ -719,9 +721,11 @@ def test_rename_simple_nested(catalog: Catalog) -> None:
)
# Check that the name mapping gets updated
- assert tbl.name_mapping() == NameMapping([
- MappedField(field_id=1, names=["foo"], fields=[MappedField(field_id=2, names=["bar", "vo"])]),
- ])
+ assert tbl.name_mapping() == NameMapping(
+ [
+ MappedField(field_id=1, names=["foo"], fields=[MappedField(field_id=2, names=["bar", "vo"])]),
+ ]
+ )
@pytest.mark.integration
diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py
index 3eb3bd68a8..3c59897b07 100644
--- a/tests/integration/test_writes/test_partitioned_writes.py
+++ b/tests/integration/test_writes/test_partitioned_writes.py
@@ -28,6 +28,7 @@
from pyiceberg.exceptions import NoSuchTableError
from pyiceberg.partitioning import PartitionField, PartitionSpec
from pyiceberg.schema import Schema
+from pyiceberg.table import TableProperties
from pyiceberg.transforms import (
BucketTransform,
DayTransform,
@@ -280,6 +281,46 @@ def test_query_filter_v1_v2_append_null(
assert df.where(f"{col} is null").count() == 2, f"Expected 2 null rows for {col}"
+@pytest.mark.integration
+@pytest.mark.parametrize(
+ "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamp", "timestamptz", "binary"]
+)
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_object_storage_location_provider_excludes_partition_path(
+ session_catalog: Catalog, spark: SparkSession, arrow_table_with_null: pa.Table, part_col: str, format_version: int
+) -> None:
+ nested_field = TABLE_SCHEMA.find_field(part_col)
+ partition_spec = PartitionSpec(
+ PartitionField(source_id=nested_field.field_id, field_id=1001, transform=IdentityTransform(), name=part_col)
+ )
+
+ # write.object-storage.enabled and write.object-storage.partitioned-paths don't need to be specified as they're on by default
+ assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT
+ assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT
+ tbl = _create_table(
+ session_catalog=session_catalog,
+ identifier=f"default.arrow_table_v{format_version}_with_null_partitioned_on_col_{part_col}",
+ properties={"format-version": str(format_version)},
+ data=[arrow_table_with_null],
+ partition_spec=partition_spec,
+ )
+
+ original_paths = tbl.inspect.data_files().to_pydict()["file_path"]
+ assert len(original_paths) == 3
+
+ # Update props to exclude partitioned paths and append data
+ with tbl.transaction() as tx:
+ tx.set_properties({TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS: False})
+ tbl.append(arrow_table_with_null)
+
+ added_paths = set(tbl.inspect.data_files().to_pydict()["file_path"]) - set(original_paths)
+ assert len(added_paths) == 3
+
+ # All paths before the props update should contain the partition, while all paths after should not
+ assert all(f"{part_col}=" in path for path in original_paths)
+ assert all(f"{part_col}=" not in path for path in added_paths)
+
+
@pytest.mark.integration
@pytest.mark.parametrize(
"spec",
@@ -395,7 +436,7 @@ def test_dynamic_partition_overwrite_unpartitioned_evolve_to_identity_transform(
# For a long string, the lower bound and upper bound is truncated
# e.g. aaaaaaaaaaaaaaaaaaaaaa has lower bound of aaaaaaaaaaaaaaaa and upper bound of aaaaaaaaaaaaaaab
# this makes strict metric evaluator determine the file evaluate as ROWS_MIGHT_NOT_MATCH
- # this further causes the partitioned data file to be overwriten rather than deleted
+ # this further causes the partitioned data file to be overwritten rather than deleted
if part_col == "string_long":
expected_operations = ["append", "append", "overwrite", "append"]
assert tbl.inspect.snapshots().to_pydict()["operation"] == expected_operations
@@ -539,7 +580,7 @@ def test_data_files_with_table_partitioned_with_null(
# the first snapshot generates M3 with 6 delete data entries collected from M1 and M2.
# ML3 = [M3]
#
- # The second snapshot generates M4 with 3 appended data entries and since M3 (previous manifests) only has delte entries it does not lint to it.
+ # The second snapshot generates M4 with 3 appended data entries and since M3 (previous manifests) only has delete entries it does not lint to it.
# ML4 = [M4]
# Append : Append generates M5 with new data entries and links to all previous manifests which is M4 .
@@ -552,7 +593,7 @@ def test_data_files_with_table_partitioned_with_null(
# ML6 = [M6, M7, M8]
#
# The second snapshot generates M9 with 3 appended data entries and it also looks at manifests in ML6 (previous manifests)
- # it ignores M6 since it only has delte entries but it links to M7 and M8.
+ # it ignores M6 since it only has delete entries but it links to M7 and M8.
# ML7 = [M9, M7, M8]
# tldr:
diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py
index f9c0afd3bc..fff48b9373 100644
--- a/tests/integration/test_writes/test_writes.py
+++ b/tests/integration/test_writes/test_writes.py
@@ -285,6 +285,33 @@ def test_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_w
assert [row.deleted_data_files_count for row in rows] == [0, 1, 0, 0, 0]
+@pytest.mark.integration
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_object_storage_data_files(
+ spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int
+) -> None:
+ tbl = _create_table(
+ session_catalog=session_catalog,
+ identifier="default.object_stored",
+ properties={"format-version": format_version, TableProperties.OBJECT_STORE_ENABLED: True},
+ data=[arrow_table_with_null],
+ )
+ tbl.append(arrow_table_with_null)
+
+ paths = tbl.inspect.data_files().to_pydict()["file_path"]
+ assert len(paths) == 2
+
+ for location in paths:
+ assert location.startswith("s3://warehouse/default/object_stored/data/")
+ parts = location.split("/")
+ assert len(parts) == 11
+
+ # Entropy binary directories should have been injected
+ for dir_name in parts[6:10]:
+ assert dir_name
+ assert all(c in "01" for c in dir_name)
+
+
@pytest.mark.integration
def test_python_writes_with_spark_snapshot_reads(
spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table
@@ -324,20 +351,24 @@ def test_python_writes_special_character_column_with_spark_reads(
{"street": "789", "city": "Random", "zip": 10112, column_name_with_special_character: "c"},
],
}
- pa_schema = pa.schema([
- pa.field(column_name_with_special_character, pa.string()),
- pa.field("id", pa.int32()),
- pa.field("name", pa.string()),
- pa.field(
- "address",
- pa.struct([
- pa.field("street", pa.string()),
- pa.field("city", pa.string()),
- pa.field("zip", pa.int32()),
- pa.field(column_name_with_special_character, pa.string()),
- ]),
- ),
- ])
+ pa_schema = pa.schema(
+ [
+ pa.field(column_name_with_special_character, pa.string()),
+ pa.field("id", pa.int32()),
+ pa.field("name", pa.string()),
+ pa.field(
+ "address",
+ pa.struct(
+ [
+ pa.field("street", pa.string()),
+ pa.field("city", pa.string()),
+ pa.field("zip", pa.int32()),
+ pa.field(column_name_with_special_character, pa.string()),
+ ]
+ ),
+ ),
+ ]
+ )
arrow_table_with_special_character_column = pa.Table.from_pydict(TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN, schema=pa_schema)
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)
@@ -357,10 +388,12 @@ def test_python_writes_dictionary_encoded_column_with_spark_reads(
"id": [1, 2, 3, 1, 1],
"name": ["AB", "CD", "EF", "CD", "EF"],
}
- pa_schema = pa.schema([
- pa.field("id", pa.dictionary(pa.int32(), pa.int32(), False)),
- pa.field("name", pa.dictionary(pa.int32(), pa.string(), False)),
- ])
+ pa_schema = pa.schema(
+ [
+ pa.field("id", pa.dictionary(pa.int32(), pa.int32(), False)),
+ pa.field("name", pa.dictionary(pa.int32(), pa.string(), False)),
+ ]
+ )
arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema)
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)
@@ -387,20 +420,24 @@ def test_python_writes_with_small_and_large_types_spark_reads(
{"street": "789", "city": "Random", "zip": 10112, "bar": "c"},
],
}
- pa_schema = pa.schema([
- pa.field("foo", pa.large_string()),
- pa.field("id", pa.int32()),
- pa.field("name", pa.string()),
- pa.field(
- "address",
- pa.struct([
- pa.field("street", pa.string()),
- pa.field("city", pa.string()),
- pa.field("zip", pa.int32()),
- pa.field("bar", pa.large_string()),
- ]),
- ),
- ])
+ pa_schema = pa.schema(
+ [
+ pa.field("foo", pa.large_string()),
+ pa.field("id", pa.int32()),
+ pa.field("name", pa.string()),
+ pa.field(
+ "address",
+ pa.struct(
+ [
+ pa.field("street", pa.string()),
+ pa.field("city", pa.string()),
+ pa.field("zip", pa.int32()),
+ pa.field("bar", pa.large_string()),
+ ]
+ ),
+ ),
+ ]
+ )
arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema)
tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema)
@@ -409,20 +446,24 @@ def test_python_writes_with_small_and_large_types_spark_reads(
pyiceberg_df = tbl.scan().to_pandas()
assert spark_df.equals(pyiceberg_df)
arrow_table_on_read = tbl.scan().to_arrow()
- assert arrow_table_on_read.schema == pa.schema([
- pa.field("foo", pa.large_string()),
- pa.field("id", pa.int32()),
- pa.field("name", pa.large_string()),
- pa.field(
- "address",
- pa.struct([
- pa.field("street", pa.large_string()),
- pa.field("city", pa.large_string()),
- pa.field("zip", pa.int32()),
- pa.field("bar", pa.large_string()),
- ]),
- ),
- ])
+ assert arrow_table_on_read.schema == pa.schema(
+ [
+ pa.field("foo", pa.large_string()),
+ pa.field("id", pa.int32()),
+ pa.field("name", pa.large_string()),
+ pa.field(
+ "address",
+ pa.struct(
+ [
+ pa.field("street", pa.large_string()),
+ pa.field("city", pa.large_string()),
+ pa.field("zip", pa.int32()),
+ pa.field("bar", pa.large_string()),
+ ]
+ ),
+ ),
+ ]
+ )
@pytest.mark.integration
@@ -718,10 +759,12 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None
"foo": ["a", None, "z"],
"bar": [19, None, 25],
},
- schema=pa.schema([
- pa.field("foo", pa.string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=True),
- ]),
+ schema=pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=True),
+ ]
+ ),
)
with tbl.transaction() as txn:
@@ -761,10 +804,12 @@ def test_create_table_transaction(catalog: Catalog, format_version: int) -> None
"foo": ["a", None, "z"],
"bar": [19, None, 25],
},
- schema=pa.schema([
- pa.field("foo", pa.string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=True),
- ]),
+ schema=pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=True),
+ ]
+ ),
)
with catalog.create_table_transaction(
@@ -810,9 +855,9 @@ def test_create_table_with_non_default_values(catalog: Catalog, table_schema_wit
except NoSuchTableError:
pass
- iceberg_spec = PartitionSpec(*[
- PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="integer_partition")
- ])
+ iceberg_spec = PartitionSpec(
+ *[PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="integer_partition")]
+ )
sort_order = SortOrder(*[SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.ASC)])
@@ -1071,9 +1116,11 @@ def test_table_write_schema_with_valid_nullability_diff(
table_schema = Schema(
NestedField(field_id=1, name="long", field_type=LongType(), required=False),
)
- other_schema = pa.schema((
- pa.field("long", pa.int64(), nullable=False), # can support writing required pyarrow field to optional Iceberg field
- ))
+ other_schema = pa.schema(
+ (
+ pa.field("long", pa.int64(), nullable=False), # can support writing required pyarrow field to optional Iceberg field
+ )
+ )
arrow_table = pa.Table.from_pydict(
{
"long": [1, 9],
@@ -1114,13 +1161,15 @@ def test_table_write_schema_with_valid_upcast(
# table's long field should cast to long on read
written_arrow_table = tbl.scan().to_arrow()
assert written_arrow_table == pyarrow_table_with_promoted_types.cast(
- pa.schema((
- pa.field("long", pa.int64(), nullable=True),
- pa.field("list", pa.large_list(pa.int64()), nullable=False),
- pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False),
- pa.field("double", pa.float64(), nullable=True), # can support upcasting float to double
- pa.field("uuid", pa.binary(length=16), nullable=True), # can UUID is read as fixed length binary of length 16
- ))
+ pa.schema(
+ (
+ pa.field("long", pa.int64(), nullable=True),
+ pa.field("list", pa.large_list(pa.int64()), nullable=False),
+ pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False),
+ pa.field("double", pa.float64(), nullable=True), # can support upcasting float to double
+ pa.field("uuid", pa.binary(length=16), nullable=True), # can UUID is read as fixed length binary of length 16
+ )
+ )
)
lhs = spark.table(f"{identifier}").toPandas()
rhs = written_arrow_table.to_pandas()
@@ -1510,16 +1559,20 @@ def test_rewrite_manifest_after_partition_evolution(session_catalog: Catalog) ->
def test_writing_null_structs(session_catalog: Catalog) -> None:
import pyarrow as pa
- schema = pa.schema([
- pa.field(
- "struct_field_1",
- pa.struct([
- pa.field("string_nested_1", pa.string()),
- pa.field("int_item_2", pa.int32()),
- pa.field("float_item_2", pa.float32()),
- ]),
- ),
- ])
+ schema = pa.schema(
+ [
+ pa.field(
+ "struct_field_1",
+ pa.struct(
+ [
+ pa.field("string_nested_1", pa.string()),
+ pa.field("int_item_2", pa.int32()),
+ pa.field("float_item_2", pa.float32()),
+ ]
+ ),
+ ),
+ ]
+ )
records = [
{
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index e4017e1df5..8beb750f49 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
# pylint: disable=protected-access,unused-argument,redefined-outer-name
-
+import logging
import os
import tempfile
import uuid
@@ -27,7 +27,7 @@
import pyarrow as pa
import pyarrow.parquet as pq
import pytest
-from pyarrow.fs import FileType, LocalFileSystem
+from pyarrow.fs import FileType, LocalFileSystem, S3FileSystem
from pyiceberg.exceptions import ResolveError
from pyiceberg.expressions import (
@@ -360,10 +360,12 @@ def test_pyarrow_s3_session_properties() -> None:
**UNIFIED_AWS_SESSION_PROPERTIES,
}
- with patch("pyarrow.fs.S3FileSystem") as mock_s3fs:
+ with patch("pyarrow.fs.S3FileSystem") as mock_s3fs, patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
s3_fileio = PyArrowFileIO(properties=session_properties)
filename = str(uuid.uuid4())
+ # Mock `resolve_s3_region` to prevent from the location used resolving to a different s3 region
+ mock_s3_region_resolver.side_effect = OSError("S3 bucket is not found")
s3_fileio.new_input(location=f"s3://warehouse/{filename}")
mock_s3fs.assert_called_with(
@@ -381,10 +383,11 @@ def test_pyarrow_unified_session_properties() -> None:
**UNIFIED_AWS_SESSION_PROPERTIES,
}
- with patch("pyarrow.fs.S3FileSystem") as mock_s3fs:
+ with patch("pyarrow.fs.S3FileSystem") as mock_s3fs, patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
s3_fileio = PyArrowFileIO(properties=session_properties)
filename = str(uuid.uuid4())
+ mock_s3_region_resolver.return_value = "client.region"
s3_fileio.new_input(location=f"s3://warehouse/{filename}")
mock_s3fs.assert_called_with(
@@ -547,11 +550,13 @@ def test_binary_type_to_pyarrow() -> None:
def test_struct_type_to_pyarrow(table_schema_simple: Schema) -> None:
- expected = pa.struct([
- pa.field("foo", pa.large_string(), nullable=True, metadata={"field_id": "1"}),
- pa.field("bar", pa.int32(), nullable=False, metadata={"field_id": "2"}),
- pa.field("baz", pa.bool_(), nullable=True, metadata={"field_id": "3"}),
- ])
+ expected = pa.struct(
+ [
+ pa.field("foo", pa.large_string(), nullable=True, metadata={"field_id": "1"}),
+ pa.field("bar", pa.int32(), nullable=False, metadata={"field_id": "2"}),
+ pa.field("baz", pa.bool_(), nullable=True, metadata={"field_id": "3"}),
+ ]
+ )
assert visit(table_schema_simple.as_struct(), _ConvertToArrowSchema()) == expected
@@ -1771,11 +1776,13 @@ def test_bin_pack_arrow_table(arrow_table_with_null: pa.Table) -> None:
def test_schema_mismatch_type(table_schema_simple: Schema) -> None:
- other_schema = pa.schema((
- pa.field("foo", pa.string(), nullable=True),
- pa.field("bar", pa.decimal128(18, 6), nullable=False),
- pa.field("baz", pa.bool_(), nullable=True),
- ))
+ other_schema = pa.schema(
+ (
+ pa.field("foo", pa.string(), nullable=True),
+ pa.field("bar", pa.decimal128(18, 6), nullable=False),
+ pa.field("baz", pa.bool_(), nullable=True),
+ )
+ )
expected = r"""Mismatch in fields:
ββββββ³βββββββββββββββββββββββββββ³ββββββββββββββββββββββββββββββββββ
@@ -1792,11 +1799,13 @@ def test_schema_mismatch_type(table_schema_simple: Schema) -> None:
def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None:
- other_schema = pa.schema((
- pa.field("foo", pa.string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=True),
- pa.field("baz", pa.bool_(), nullable=True),
- ))
+ other_schema = pa.schema(
+ (
+ pa.field("foo", pa.string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=True),
+ pa.field("baz", pa.bool_(), nullable=True),
+ )
+ )
expected = """Mismatch in fields:
ββββββ³βββββββββββββββββββββββββββ³βββββββββββββββββββββββββββ
@@ -1813,11 +1822,13 @@ def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None:
def test_schema_compatible_nullability_diff(table_schema_simple: Schema) -> None:
- other_schema = pa.schema((
- pa.field("foo", pa.string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=False),
- pa.field("baz", pa.bool_(), nullable=False),
- ))
+ other_schema = pa.schema(
+ (
+ pa.field("foo", pa.string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=False),
+ pa.field("baz", pa.bool_(), nullable=False),
+ )
+ )
try:
_check_pyarrow_schema_compatible(table_schema_simple, other_schema)
@@ -1826,10 +1837,12 @@ def test_schema_compatible_nullability_diff(table_schema_simple: Schema) -> None
def test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None:
- other_schema = pa.schema((
- pa.field("foo", pa.string(), nullable=True),
- pa.field("baz", pa.bool_(), nullable=True),
- ))
+ other_schema = pa.schema(
+ (
+ pa.field("foo", pa.string(), nullable=True),
+ pa.field("baz", pa.bool_(), nullable=True),
+ )
+ )
expected = """Mismatch in fields:
ββββββ³βββββββββββββββββββββββββββ³βββββββββββββββββββββββββββ
@@ -1851,9 +1864,11 @@ def test_schema_compatible_missing_nullable_field_nested(table_schema_nested: Sc
6,
pa.field(
"person",
- pa.struct([
- pa.field("age", pa.int32(), nullable=False),
- ]),
+ pa.struct(
+ [
+ pa.field("age", pa.int32(), nullable=False),
+ ]
+ ),
nullable=True,
),
)
@@ -1869,9 +1884,11 @@ def test_schema_mismatch_missing_required_field_nested(table_schema_nested: Sche
6,
pa.field(
"person",
- pa.struct([
- pa.field("name", pa.string(), nullable=True),
- ]),
+ pa.struct(
+ [
+ pa.field("name", pa.string(), nullable=True),
+ ]
+ ),
nullable=True,
),
)
@@ -1920,12 +1937,14 @@ def test_schema_compatible_nested(table_schema_nested: Schema) -> None:
def test_schema_mismatch_additional_field(table_schema_simple: Schema) -> None:
- other_schema = pa.schema((
- pa.field("foo", pa.string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=False),
- pa.field("baz", pa.bool_(), nullable=True),
- pa.field("new_field", pa.date32(), nullable=True),
- ))
+ other_schema = pa.schema(
+ (
+ pa.field("foo", pa.string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=False),
+ pa.field("baz", pa.bool_(), nullable=True),
+ pa.field("new_field", pa.date32(), nullable=True),
+ )
+ )
with pytest.raises(
ValueError, match=r"PyArrow table contains more columns: new_field. Update the schema first \(hint, use union_by_name\)."
@@ -1942,10 +1961,12 @@ def test_schema_compatible(table_schema_simple: Schema) -> None:
def test_schema_projection(table_schema_simple: Schema) -> None:
# remove optional `baz` field from `table_schema_simple`
- other_schema = pa.schema((
- pa.field("foo", pa.string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=False),
- ))
+ other_schema = pa.schema(
+ (
+ pa.field("foo", pa.string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=False),
+ )
+ )
try:
_check_pyarrow_schema_compatible(table_schema_simple, other_schema)
except Exception:
@@ -1954,11 +1975,13 @@ def test_schema_projection(table_schema_simple: Schema) -> None:
def test_schema_downcast(table_schema_simple: Schema) -> None:
# large_string type is compatible with string type
- other_schema = pa.schema((
- pa.field("foo", pa.large_string(), nullable=True),
- pa.field("bar", pa.int32(), nullable=False),
- pa.field("baz", pa.bool_(), nullable=True),
- ))
+ other_schema = pa.schema(
+ (
+ pa.field("foo", pa.large_string(), nullable=True),
+ pa.field("bar", pa.int32(), nullable=False),
+ pa.field("baz", pa.bool_(), nullable=True),
+ )
+ )
try:
_check_pyarrow_schema_compatible(table_schema_simple, other_schema)
@@ -2037,11 +2060,13 @@ def test_identity_partition_on_multi_columns() -> None:
assert {table_partition.partition_key.partition for table_partition in result} == expected
concatenated_arrow_table = pa.concat_tables([table_partition.arrow_table_partition for table_partition in result])
assert concatenated_arrow_table.num_rows == arrow_table.num_rows
- assert concatenated_arrow_table.sort_by([
- ("born_year", "ascending"),
- ("n_legs", "ascending"),
- ("animal", "ascending"),
- ]) == arrow_table.sort_by([("born_year", "ascending"), ("n_legs", "ascending"), ("animal", "ascending")])
+ assert concatenated_arrow_table.sort_by(
+ [
+ ("born_year", "ascending"),
+ ("n_legs", "ascending"),
+ ("animal", "ascending"),
+ ]
+ ) == arrow_table.sort_by([("born_year", "ascending"), ("n_legs", "ascending"), ("animal", "ascending")])
def test__to_requested_schema_timestamps(
@@ -2074,3 +2099,88 @@ def test__to_requested_schema_timestamps_without_downcast_raises_exception(
_to_requested_schema(requested_schema, file_schema, batch, downcast_ns_timestamp_to_us=False, include_field_ids=False)
assert "Unsupported schema projection from timestamp[ns] to timestamp[us]" in str(exc_info.value)
+
+
+def test_pyarrow_file_io_fs_by_scheme_cache() -> None:
+ # It's better to set up multi-region minio servers for an integration test once `endpoint_url` argument becomes available for `resolve_s3_region`
+ # Refer to: https://github.com/apache/arrow/issues/43713
+
+ pyarrow_file_io = PyArrowFileIO()
+ us_east_1_region = "us-east-1"
+ ap_southeast_2_region = "ap-southeast-2"
+
+ with patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
+ # Call with new argument resolves region automatically
+ mock_s3_region_resolver.return_value = us_east_1_region
+ filesystem_us = pyarrow_file_io.fs_by_scheme("s3", "us-east-1-bucket")
+ assert filesystem_us.region == us_east_1_region
+ assert pyarrow_file_io.fs_by_scheme.cache_info().misses == 1 # type: ignore
+ assert pyarrow_file_io.fs_by_scheme.cache_info().currsize == 1 # type: ignore
+
+ # Call with different argument also resolves region automatically
+ mock_s3_region_resolver.return_value = ap_southeast_2_region
+ filesystem_ap_southeast_2 = pyarrow_file_io.fs_by_scheme("s3", "ap-southeast-2-bucket")
+ assert filesystem_ap_southeast_2.region == ap_southeast_2_region
+ assert pyarrow_file_io.fs_by_scheme.cache_info().misses == 2 # type: ignore
+ assert pyarrow_file_io.fs_by_scheme.cache_info().currsize == 2 # type: ignore
+
+ # Call with same argument hits cache
+ filesystem_us_cached = pyarrow_file_io.fs_by_scheme("s3", "us-east-1-bucket")
+ assert filesystem_us_cached.region == us_east_1_region
+ assert pyarrow_file_io.fs_by_scheme.cache_info().hits == 1 # type: ignore
+
+ # Call with same argument hits cache
+ filesystem_ap_southeast_2_cached = pyarrow_file_io.fs_by_scheme("s3", "ap-southeast-2-bucket")
+ assert filesystem_ap_southeast_2_cached.region == ap_southeast_2_region
+ assert pyarrow_file_io.fs_by_scheme.cache_info().hits == 2 # type: ignore
+
+
+def test_pyarrow_io_new_input_multi_region(caplog: Any) -> None:
+ # It's better to set up multi-region minio servers for an integration test once `endpoint_url` argument becomes available for `resolve_s3_region`
+ # Refer to: https://github.com/apache/arrow/issues/43713
+ user_provided_region = "ap-southeast-1"
+ bucket_regions = [
+ ("us-east-2-bucket", "us-east-2"),
+ ("ap-southeast-2-bucket", "ap-southeast-2"),
+ ]
+
+ def _s3_region_map(bucket: str) -> str:
+ for bucket_region in bucket_regions:
+ if bucket_region[0] == bucket:
+ return bucket_region[1]
+ raise OSError("Unknown bucket")
+
+ # For a pyarrow io instance with configured default s3 region
+ pyarrow_file_io = PyArrowFileIO({"s3.region": user_provided_region})
+ with patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
+ mock_s3_region_resolver.side_effect = _s3_region_map
+
+ # The region is set to provided region if bucket region cannot be resolved
+ with caplog.at_level(logging.WARNING):
+ assert pyarrow_file_io.new_input("s3://non-exist-bucket/path/to/file")._filesystem.region == user_provided_region
+ assert f"Unable to resolve region for bucket non-exist-bucket, using default region {user_provided_region}" in caplog.text
+
+ for bucket_region in bucket_regions:
+ # For s3 scheme, region is overwritten by resolved bucket region if different from user provided region
+ with caplog.at_level(logging.WARNING):
+ assert pyarrow_file_io.new_input(f"s3://{bucket_region[0]}/path/to/file")._filesystem.region == bucket_region[1]
+ assert (
+ f"PyArrow FileIO overriding S3 bucket region for bucket {bucket_region[0]}: "
+ f"provided region {user_provided_region}, actual region {bucket_region[1]}" in caplog.text
+ )
+
+ # For oss scheme, user provided region is used instead
+ assert pyarrow_file_io.new_input(f"oss://{bucket_region[0]}/path/to/file")._filesystem.region == user_provided_region
+
+
+def test_pyarrow_io_multi_fs() -> None:
+ pyarrow_file_io = PyArrowFileIO({"s3.region": "ap-southeast-1"})
+
+ with patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
+ mock_s3_region_resolver.return_value = None
+
+ # The PyArrowFileIO instance resolves s3 file input to S3FileSystem
+ assert isinstance(pyarrow_file_io.new_input("s3://bucket/path/to/file")._filesystem, S3FileSystem)
+
+ # Same PyArrowFileIO instance resolves local file input to LocalFileSystem
+ assert isinstance(pyarrow_file_io.new_input("file:///path/to/file")._filesystem, LocalFileSystem)
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index 9e6df720c6..027fccae7c 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -239,11 +239,13 @@ def test_pyarrow_variable_binary_to_iceberg() -> None:
def test_pyarrow_struct_to_iceberg() -> None:
- pyarrow_struct = pa.struct([
- pa.field("foo", pa.string(), nullable=True, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
- pa.field("bar", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "2"}),
- pa.field("baz", pa.bool_(), nullable=True, metadata={"PARQUET:field_id": "3"}),
- ])
+ pyarrow_struct = pa.struct(
+ [
+ pa.field("foo", pa.string(), nullable=True, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
+ pa.field("bar", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "2"}),
+ pa.field("baz", pa.bool_(), nullable=True, metadata={"PARQUET:field_id": "3"}),
+ ]
+ )
expected = StructType(
NestedField(field_id=1, name="foo", field_type=StringType(), required=False, doc="foo doc"),
NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True),
@@ -344,84 +346,94 @@ def test_round_schema_large_string() -> None:
def test_simple_schema_has_missing_ids() -> None:
- schema = pa.schema([
- pa.field("foo", pa.string(), nullable=False),
- ])
+ schema = pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=False),
+ ]
+ )
visitor = _HasIds()
has_ids = visit_pyarrow(schema, visitor)
assert not has_ids
def test_simple_schema_has_missing_ids_partial() -> None:
- schema = pa.schema([
- pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
- pa.field("bar", pa.int32(), nullable=False),
- ])
+ schema = pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
+ pa.field("bar", pa.int32(), nullable=False),
+ ]
+ )
visitor = _HasIds()
has_ids = visit_pyarrow(schema, visitor)
assert not has_ids
def test_nested_schema_has_missing_ids() -> None:
- schema = pa.schema([
- pa.field("foo", pa.string(), nullable=False),
- pa.field(
- "quux",
- pa.map_(
- pa.string(),
- pa.map_(pa.string(), pa.int32()),
+ schema = pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=False),
+ pa.field(
+ "quux",
+ pa.map_(
+ pa.string(),
+ pa.map_(pa.string(), pa.int32()),
+ ),
+ nullable=False,
),
- nullable=False,
- ),
- ])
+ ]
+ )
visitor = _HasIds()
has_ids = visit_pyarrow(schema, visitor)
assert not has_ids
def test_nested_schema_has_ids() -> None:
- schema = pa.schema([
- pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
- pa.field(
- "quux",
- pa.map_(
- pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}),
- pa.field(
- "value",
- pa.map_(
- pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "9"}),
- pa.field("value", pa.int32(), metadata={"PARQUET:field_id": "10"}),
+ schema = pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
+ pa.field(
+ "quux",
+ pa.map_(
+ pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}),
+ pa.field(
+ "value",
+ pa.map_(
+ pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "9"}),
+ pa.field("value", pa.int32(), metadata={"PARQUET:field_id": "10"}),
+ ),
+ nullable=False,
+ metadata={"PARQUET:field_id": "8"},
),
- nullable=False,
- metadata={"PARQUET:field_id": "8"},
),
+ nullable=False,
+ metadata={"PARQUET:field_id": "6", "doc": "quux doc"},
),
- nullable=False,
- metadata={"PARQUET:field_id": "6", "doc": "quux doc"},
- ),
- ])
+ ]
+ )
visitor = _HasIds()
has_ids = visit_pyarrow(schema, visitor)
assert has_ids
def test_nested_schema_has_partial_missing_ids() -> None:
- schema = pa.schema([
- pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
- pa.field(
- "quux",
- pa.map_(
- pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}),
- pa.field(
- "value",
- pa.map_(pa.field("key", pa.string(), nullable=False), pa.field("value", pa.int32())),
- nullable=False,
+ schema = pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}),
+ pa.field(
+ "quux",
+ pa.map_(
+ pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}),
+ pa.field(
+ "value",
+ pa.map_(pa.field("key", pa.string(), nullable=False), pa.field("value", pa.int32())),
+ nullable=False,
+ ),
),
+ nullable=False,
+ metadata={"PARQUET:field_id": "6", "doc": "quux doc"},
),
- nullable=False,
- metadata={"PARQUET:field_id": "6", "doc": "quux doc"},
- ),
- ])
+ ]
+ )
visitor = _HasIds()
has_ids = visit_pyarrow(schema, visitor)
assert not has_ids
@@ -441,11 +453,13 @@ def test_simple_pyarrow_schema_to_schema_missing_ids_using_name_mapping(
pyarrow_schema_simple_without_ids: pa.Schema, iceberg_schema_simple: Schema
) -> None:
schema = pyarrow_schema_simple_without_ids
- name_mapping = NameMapping([
- MappedField(field_id=1, names=["foo"]),
- MappedField(field_id=2, names=["bar"]),
- MappedField(field_id=3, names=["baz"]),
- ])
+ name_mapping = NameMapping(
+ [
+ MappedField(field_id=1, names=["foo"]),
+ MappedField(field_id=2, names=["bar"]),
+ MappedField(field_id=3, names=["baz"]),
+ ]
+ )
assert pyarrow_to_schema(schema, name_mapping) == iceberg_schema_simple
@@ -454,9 +468,11 @@ def test_simple_pyarrow_schema_to_schema_missing_ids_using_name_mapping_partial_
pyarrow_schema_simple_without_ids: pa.Schema,
) -> None:
schema = pyarrow_schema_simple_without_ids
- name_mapping = NameMapping([
- MappedField(field_id=1, names=["foo"]),
- ])
+ name_mapping = NameMapping(
+ [
+ MappedField(field_id=1, names=["foo"]),
+ ]
+ )
with pytest.raises(ValueError) as exc_info:
_ = pyarrow_to_schema(schema, name_mapping)
assert "Could not find field with name: bar" in str(exc_info.value)
@@ -467,83 +483,89 @@ def test_nested_pyarrow_schema_to_schema_missing_ids_using_name_mapping(
) -> None:
schema = pyarrow_schema_nested_without_ids
- name_mapping = NameMapping([
- MappedField(field_id=1, names=["foo"]),
- MappedField(field_id=2, names=["bar"]),
- MappedField(field_id=3, names=["baz"]),
- MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
- MappedField(
- field_id=6,
- names=["quux"],
- fields=[
- MappedField(field_id=7, names=["key"]),
- MappedField(
- field_id=8,
- names=["value"],
- fields=[
- MappedField(field_id=9, names=["key"]),
- MappedField(field_id=10, names=["value"]),
- ],
- ),
- ],
- ),
- MappedField(
- field_id=11,
- names=["location"],
- fields=[
- MappedField(
- field_id=12,
- names=["element"],
- fields=[
- MappedField(field_id=13, names=["latitude"]),
- MappedField(field_id=14, names=["longitude"]),
- ],
- )
- ],
- ),
- MappedField(
- field_id=15,
- names=["person"],
- fields=[
- MappedField(field_id=16, names=["name"]),
- MappedField(field_id=17, names=["age"]),
- ],
- ),
- ])
+ name_mapping = NameMapping(
+ [
+ MappedField(field_id=1, names=["foo"]),
+ MappedField(field_id=2, names=["bar"]),
+ MappedField(field_id=3, names=["baz"]),
+ MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
+ MappedField(
+ field_id=6,
+ names=["quux"],
+ fields=[
+ MappedField(field_id=7, names=["key"]),
+ MappedField(
+ field_id=8,
+ names=["value"],
+ fields=[
+ MappedField(field_id=9, names=["key"]),
+ MappedField(field_id=10, names=["value"]),
+ ],
+ ),
+ ],
+ ),
+ MappedField(
+ field_id=11,
+ names=["location"],
+ fields=[
+ MappedField(
+ field_id=12,
+ names=["element"],
+ fields=[
+ MappedField(field_id=13, names=["latitude"]),
+ MappedField(field_id=14, names=["longitude"]),
+ ],
+ )
+ ],
+ ),
+ MappedField(
+ field_id=15,
+ names=["person"],
+ fields=[
+ MappedField(field_id=16, names=["name"]),
+ MappedField(field_id=17, names=["age"]),
+ ],
+ ),
+ ]
+ )
assert pyarrow_to_schema(schema, name_mapping) == iceberg_schema_nested
def test_pyarrow_schema_to_schema_missing_ids_using_name_mapping_nested_missing_id() -> None:
- schema = pa.schema([
- pa.field("foo", pa.string(), nullable=False),
- pa.field(
- "quux",
- pa.map_(
- pa.string(),
- pa.map_(pa.string(), pa.int32()),
- ),
- nullable=False,
- ),
- ])
-
- name_mapping = NameMapping([
- MappedField(field_id=1, names=["foo"]),
- MappedField(
- field_id=6,
- names=["quux"],
- fields=[
- MappedField(field_id=7, names=["key"]),
- MappedField(
- field_id=8,
- names=["value"],
- fields=[
- MappedField(field_id=10, names=["value"]),
- ],
+ schema = pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=False),
+ pa.field(
+ "quux",
+ pa.map_(
+ pa.string(),
+ pa.map_(pa.string(), pa.int32()),
),
- ],
- ),
- ])
+ nullable=False,
+ ),
+ ]
+ )
+
+ name_mapping = NameMapping(
+ [
+ MappedField(field_id=1, names=["foo"]),
+ MappedField(
+ field_id=6,
+ names=["quux"],
+ fields=[
+ MappedField(field_id=7, names=["key"]),
+ MappedField(
+ field_id=8,
+ names=["value"],
+ fields=[
+ MappedField(field_id=10, names=["value"]),
+ ],
+ ),
+ ],
+ ),
+ ]
+ )
with pytest.raises(ValueError) as exc_info:
_ = pyarrow_to_schema(schema, name_mapping)
assert "Could not find field with name: quux.value.key" in str(exc_info.value)
@@ -562,38 +584,44 @@ def test_pyarrow_schema_to_schema_fresh_ids_nested_schema(
def test_pyarrow_schema_ensure_large_types(pyarrow_schema_nested_without_ids: pa.Schema) -> None:
- expected_schema = pa.schema([
- pa.field("foo", pa.large_string(), nullable=False),
- pa.field("bar", pa.int32(), nullable=False),
- pa.field("baz", pa.bool_(), nullable=True),
- pa.field("qux", pa.large_list(pa.large_string()), nullable=False),
- pa.field(
- "quux",
- pa.map_(
- pa.large_string(),
- pa.map_(pa.large_string(), pa.int32()),
+ expected_schema = pa.schema(
+ [
+ pa.field("foo", pa.large_string(), nullable=False),
+ pa.field("bar", pa.int32(), nullable=False),
+ pa.field("baz", pa.bool_(), nullable=True),
+ pa.field("qux", pa.large_list(pa.large_string()), nullable=False),
+ pa.field(
+ "quux",
+ pa.map_(
+ pa.large_string(),
+ pa.map_(pa.large_string(), pa.int32()),
+ ),
+ nullable=False,
),
- nullable=False,
- ),
- pa.field(
- "location",
- pa.large_list(
- pa.struct([
- pa.field("latitude", pa.float32(), nullable=False),
- pa.field("longitude", pa.float32(), nullable=False),
- ]),
+ pa.field(
+ "location",
+ pa.large_list(
+ pa.struct(
+ [
+ pa.field("latitude", pa.float32(), nullable=False),
+ pa.field("longitude", pa.float32(), nullable=False),
+ ]
+ ),
+ ),
+ nullable=False,
),
- nullable=False,
- ),
- pa.field(
- "person",
- pa.struct([
- pa.field("name", pa.large_string(), nullable=True),
- pa.field("age", pa.int32(), nullable=False),
- ]),
- nullable=True,
- ),
- ])
+ pa.field(
+ "person",
+ pa.struct(
+ [
+ pa.field("name", pa.large_string(), nullable=True),
+ pa.field("age", pa.int32(), nullable=False),
+ ]
+ ),
+ nullable=True,
+ ),
+ ]
+ )
assert _pyarrow_schema_ensure_large_types(pyarrow_schema_nested_without_ids) == expected_schema
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index bdc3d030fd..bcb2d643dc 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -527,7 +527,7 @@ def test_update_column(table_v1: Table, table_v2: Table) -> None:
new_schema = table.transaction().update_schema().update_column("y", doc=COMMENT2)._apply()
assert new_schema.find_field("y").doc == COMMENT2, "failed to update existing field doc"
- # update existing doc to an emtpy string
+ # update existing doc to an empty string
assert new_schema.find_field("y").doc == COMMENT2
new_schema2 = table.transaction().update_schema().update_column("y", doc="")._apply()
assert new_schema2.find_field("y").doc == "", "failed to remove existing field doc"
@@ -538,15 +538,15 @@ def test_update_column(table_v1: Table, table_v2: Table) -> None:
assert new_schema3.find_field("z").required is False, "failed to update existing field required"
# assert the above two updates also works with union_by_name
- assert table.update_schema().union_by_name(new_schema)._apply() == new_schema, (
- "failed to update existing field doc with union_by_name"
- )
- assert table.update_schema().union_by_name(new_schema2)._apply() == new_schema2, (
- "failed to remove existing field doc with union_by_name"
- )
- assert table.update_schema().union_by_name(new_schema3)._apply() == new_schema3, (
- "failed to update existing field required with union_by_name"
- )
+ assert (
+ table.update_schema().union_by_name(new_schema)._apply() == new_schema
+ ), "failed to update existing field doc with union_by_name"
+ assert (
+ table.update_schema().union_by_name(new_schema2)._apply() == new_schema2
+ ), "failed to remove existing field doc with union_by_name"
+ assert (
+ table.update_schema().union_by_name(new_schema3)._apply() == new_schema3
+ ), "failed to update existing field required with union_by_name"
def test_add_primitive_type_column(table_v2: Table) -> None:
@@ -1077,52 +1077,56 @@ def test_assert_default_sort_order_id(table_v2: Table) -> None:
def test_correct_schema() -> None:
- table_metadata = TableMetadataV2(**{
- "format-version": 2,
- "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
- "location": "s3://bucket/test/location",
- "last-sequence-number": 34,
- "last-updated-ms": 1602638573590,
- "last-column-id": 3,
- "current-schema-id": 1,
- "schemas": [
- {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
- {
- "type": "struct",
- "schema-id": 1,
- "identifier-field-ids": [1, 2],
- "fields": [
- {"id": 1, "name": "x", "required": True, "type": "long"},
- {"id": 2, "name": "y", "required": True, "type": "long"},
- {"id": 3, "name": "z", "required": True, "type": "long"},
- ],
- },
- ],
- "default-spec-id": 0,
- "partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
- "last-partition-id": 1000,
- "default-sort-order-id": 0,
- "sort-orders": [],
- "current-snapshot-id": 123,
- "snapshots": [
- {
- "snapshot-id": 234,
- "timestamp-ms": 1515100955770,
- "sequence-number": 0,
- "summary": {"operation": "append"},
- "manifest-list": "s3://a/b/1.avro",
- "schema-id": 10,
- },
- {
- "snapshot-id": 123,
- "timestamp-ms": 1515100955770,
- "sequence-number": 0,
- "summary": {"operation": "append"},
- "manifest-list": "s3://a/b/1.avro",
- "schema-id": 0,
- },
- ],
- })
+ table_metadata = TableMetadataV2(
+ **{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 1,
+ "schemas": [
+ {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
+ {
+ "type": "struct",
+ "schema-id": 1,
+ "identifier-field-ids": [1, 2],
+ "fields": [
+ {"id": 1, "name": "x", "required": True, "type": "long"},
+ {"id": 2, "name": "y", "required": True, "type": "long"},
+ {"id": 3, "name": "z", "required": True, "type": "long"},
+ ],
+ },
+ ],
+ "default-spec-id": 0,
+ "partition-specs": [
+ {"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}
+ ],
+ "last-partition-id": 1000,
+ "default-sort-order-id": 0,
+ "sort-orders": [],
+ "current-snapshot-id": 123,
+ "snapshots": [
+ {
+ "snapshot-id": 234,
+ "timestamp-ms": 1515100955770,
+ "sequence-number": 0,
+ "summary": {"operation": "append"},
+ "manifest-list": "s3://a/b/1.avro",
+ "schema-id": 10,
+ },
+ {
+ "snapshot-id": 123,
+ "timestamp-ms": 1515100955770,
+ "sequence-number": 0,
+ "summary": {"operation": "append"},
+ "manifest-list": "s3://a/b/1.avro",
+ "schema-id": 0,
+ },
+ ],
+ }
+ )
t = Table(
identifier=("default", "t1"),
diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py
new file mode 100644
index 0000000000..67911b6271
--- /dev/null
+++ b/tests/table/test_locations.py
@@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from typing import Optional
+
+import pytest
+
+from pyiceberg.partitioning import PartitionField, PartitionFieldValue, PartitionKey, PartitionSpec
+from pyiceberg.schema import Schema
+from pyiceberg.table.locations import LocationProvider, load_location_provider
+from pyiceberg.transforms import IdentityTransform
+from pyiceberg.typedef import EMPTY_DICT
+from pyiceberg.types import NestedField, StringType
+
+PARTITION_FIELD = PartitionField(source_id=1, field_id=1002, transform=IdentityTransform(), name="string_field")
+PARTITION_KEY = PartitionKey(
+ raw_partition_field_values=[PartitionFieldValue(PARTITION_FIELD, "example_string")],
+ partition_spec=PartitionSpec(PARTITION_FIELD),
+ schema=Schema(NestedField(field_id=1, name="string_field", field_type=StringType(), required=False)),
+)
+
+
+class CustomLocationProvider(LocationProvider):
+ def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
+ return f"custom_location_provider/{data_file_name}"
+
+
+def test_simple_location_provider_no_partition() -> None:
+ provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})
+
+ assert provider.new_data_location("my_file") == "table_location/data/my_file"
+
+
+def test_simple_location_provider_with_partition() -> None:
+ provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})
+
+ assert provider.new_data_location("my_file", PARTITION_KEY) == "table_location/data/string_field=example_string/my_file"
+
+
+def test_custom_location_provider() -> None:
+ qualified_name = CustomLocationProvider.__module__ + "." + CustomLocationProvider.__name__
+ provider = load_location_provider(
+ table_location="table_location", table_properties={"write.py-location-provider.impl": qualified_name}
+ )
+
+ assert provider.new_data_location("my_file") == "custom_location_provider/my_file"
+
+
+def test_custom_location_provider_single_path() -> None:
+ with pytest.raises(ValueError, match=r"write\.py-location-provider\.impl should be full path"):
+ load_location_provider(table_location="table_location", table_properties={"write.py-location-provider.impl": "not_found"})
+
+
+def test_custom_location_provider_not_found() -> None:
+ with pytest.raises(ValueError, match=r"Could not initialize LocationProvider"):
+ load_location_provider(
+ table_location="table_location", table_properties={"write.py-location-provider.impl": "module.not_found"}
+ )
+
+
+def test_object_storage_no_partition() -> None:
+ provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
+
+ location = provider.new_data_location("test.parquet")
+ parts = location.split("/")
+
+ assert len(parts) == 7
+ assert parts[0] == "table_location"
+ assert parts[1] == "data"
+ assert parts[-1] == "test.parquet"
+
+ # Entropy directories in the middle
+ for dir_name in parts[2:-1]:
+ assert dir_name
+ assert all(c in "01" for c in dir_name)
+
+
+def test_object_storage_with_partition() -> None:
+ provider = load_location_provider(
+ table_location="table_location",
+ table_properties={"write.object-storage.enabled": "true"},
+ )
+
+ location = provider.new_data_location("test.parquet", PARTITION_KEY)
+
+ # Partition values AND entropy included in the path. Entropy differs to that in the test below because the partition
+ # key AND the data file name are used as the hash input. This matches Java behaviour; the hash below is what the
+ # Java implementation produces for this input too.
+ assert location == "table_location/data/0001/0010/1001/00000011/string_field=example_string/test.parquet"
+
+
+# NB: We test here with None partition key too because disabling partitioned paths still replaces final / with - even in
+# paths of un-partitioned files. This matches the behaviour of the Java implementation.
+@pytest.mark.parametrize("partition_key", [PARTITION_KEY, None])
+def test_object_storage_partitioned_paths_disabled(partition_key: Optional[PartitionKey]) -> None:
+ provider = load_location_provider(
+ table_location="table_location",
+ table_properties={
+ "write.object-storage.partitioned-paths": "false",
+ },
+ )
+
+ location = provider.new_data_location("test.parquet", partition_key)
+
+ # No partition values included in the path and last part of entropy is separated with "-"
+ assert location == "table_location/data/0110/1010/0011/11101000-test.parquet"
+
+
+@pytest.mark.parametrize(
+ ["data_file_name", "expected_hash"],
+ [
+ ("a", "0101/0110/1001/10110010"),
+ ("b", "1110/0111/1110/00000011"),
+ ("c", "0010/1101/0110/01011111"),
+ ("d", "1001/0001/0100/01110011"),
+ ],
+)
+def test_hash_injection(data_file_name: str, expected_hash: str) -> None:
+ provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
+
+ assert provider.new_data_location(data_file_name) == f"table_location/data/{expected_hash}/{data_file_name}"
diff --git a/tests/table/test_name_mapping.py b/tests/table/test_name_mapping.py
index bd271f59f8..c567f3ffb4 100644
--- a/tests/table/test_name_mapping.py
+++ b/tests/table/test_name_mapping.py
@@ -30,49 +30,51 @@
@pytest.fixture(scope="session")
def table_name_mapping_nested() -> NameMapping:
- return NameMapping([
- MappedField(field_id=1, names=["foo"]),
- MappedField(field_id=2, names=["bar"]),
- MappedField(field_id=3, names=["baz"]),
- MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
- MappedField(
- field_id=6,
- names=["quux"],
- fields=[
- MappedField(field_id=7, names=["key"]),
- MappedField(
- field_id=8,
- names=["value"],
- fields=[
- MappedField(field_id=9, names=["key"]),
- MappedField(field_id=10, names=["value"]),
- ],
- ),
- ],
- ),
- MappedField(
- field_id=11,
- names=["location"],
- fields=[
- MappedField(
- field_id=12,
- names=["element"],
- fields=[
- MappedField(field_id=13, names=["latitude"]),
- MappedField(field_id=14, names=["longitude"]),
- ],
- )
- ],
- ),
- MappedField(
- field_id=15,
- names=["person"],
- fields=[
- MappedField(field_id=16, names=["name"]),
- MappedField(field_id=17, names=["age"]),
- ],
- ),
- ])
+ return NameMapping(
+ [
+ MappedField(field_id=1, names=["foo"]),
+ MappedField(field_id=2, names=["bar"]),
+ MappedField(field_id=3, names=["baz"]),
+ MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
+ MappedField(
+ field_id=6,
+ names=["quux"],
+ fields=[
+ MappedField(field_id=7, names=["key"]),
+ MappedField(
+ field_id=8,
+ names=["value"],
+ fields=[
+ MappedField(field_id=9, names=["key"]),
+ MappedField(field_id=10, names=["value"]),
+ ],
+ ),
+ ],
+ ),
+ MappedField(
+ field_id=11,
+ names=["location"],
+ fields=[
+ MappedField(
+ field_id=12,
+ names=["element"],
+ fields=[
+ MappedField(field_id=13, names=["latitude"]),
+ MappedField(field_id=14, names=["longitude"]),
+ ],
+ )
+ ],
+ ),
+ MappedField(
+ field_id=15,
+ names=["person"],
+ fields=[
+ MappedField(field_id=16, names=["name"]),
+ MappedField(field_id=17, names=["age"]),
+ ],
+ ),
+ ]
+ )
def test_json_mapped_field_deserialization() -> None:
@@ -165,26 +167,30 @@ def test_json_name_mapping_deserialization() -> None:
]
"""
- assert parse_mapping_from_json(name_mapping) == NameMapping([
- MappedField(field_id=1, names=["id", "record_id"]),
- MappedField(field_id=2, names=["data"]),
- MappedField(
- names=["location"],
- field_id=3,
- fields=[
- MappedField(field_id=4, names=["latitude", "lat"]),
- MappedField(field_id=5, names=["longitude", "long"]),
- ],
- ),
- ])
+ assert parse_mapping_from_json(name_mapping) == NameMapping(
+ [
+ MappedField(field_id=1, names=["id", "record_id"]),
+ MappedField(field_id=2, names=["data"]),
+ MappedField(
+ names=["location"],
+ field_id=3,
+ fields=[
+ MappedField(field_id=4, names=["latitude", "lat"]),
+ MappedField(field_id=5, names=["longitude", "long"]),
+ ],
+ ),
+ ]
+ )
def test_json_mapped_field_no_field_id_serialization() -> None:
- table_name_mapping_nested_no_field_id = NameMapping([
- MappedField(field_id=1, names=["foo"]),
- MappedField(field_id=None, names=["bar"]),
- MappedField(field_id=2, names=["qux"], fields=[MappedField(field_id=None, names=["element"])]),
- ])
+ table_name_mapping_nested_no_field_id = NameMapping(
+ [
+ MappedField(field_id=1, names=["foo"]),
+ MappedField(field_id=None, names=["bar"]),
+ MappedField(field_id=2, names=["qux"], fields=[MappedField(field_id=None, names=["element"])]),
+ ]
+ )
assert (
table_name_mapping_nested_no_field_id.model_dump_json()
@@ -200,18 +206,20 @@ def test_json_serialization(table_name_mapping_nested: NameMapping) -> None:
def test_name_mapping_to_string() -> None:
- nm = NameMapping([
- MappedField(field_id=1, names=["id", "record_id"]),
- MappedField(field_id=2, names=["data"]),
- MappedField(
- names=["location"],
- field_id=3,
- fields=[
- MappedField(field_id=4, names=["lat", "latitude"]),
- MappedField(field_id=5, names=["long", "longitude"]),
- ],
- ),
- ])
+ nm = NameMapping(
+ [
+ MappedField(field_id=1, names=["id", "record_id"]),
+ MappedField(field_id=2, names=["data"]),
+ MappedField(
+ names=["location"],
+ field_id=3,
+ fields=[
+ MappedField(field_id=4, names=["lat", "latitude"]),
+ MappedField(field_id=5, names=["long", "longitude"]),
+ ],
+ ),
+ ]
+ )
assert (
str(nm)
@@ -294,51 +302,53 @@ def test_update_mapping(table_name_mapping_nested: NameMapping) -> None:
15: [NestedField(19, "name", StringType(), True), NestedField(20, "add_20", StringType(), True)],
}
- expected = NameMapping([
- MappedField(field_id=1, names=["foo", "foo_update"]),
- MappedField(field_id=2, names=["bar"]),
- MappedField(field_id=3, names=["baz"]),
- MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
- MappedField(
- field_id=6,
- names=["quux"],
- fields=[
- MappedField(field_id=7, names=["key"]),
- MappedField(
- field_id=8,
- names=["value"],
- fields=[
- MappedField(field_id=9, names=["key"]),
- MappedField(field_id=10, names=["value"]),
- ],
- ),
- ],
- ),
- MappedField(
- field_id=11,
- names=["location"],
- fields=[
- MappedField(
- field_id=12,
- names=["element"],
- fields=[
- MappedField(field_id=13, names=["latitude"]),
- MappedField(field_id=14, names=["longitude"]),
- ],
- )
- ],
- ),
- MappedField(
- field_id=15,
- names=["person"],
- fields=[
- MappedField(field_id=17, names=["age"]),
- MappedField(field_id=19, names=["name"]),
- MappedField(field_id=20, names=["add_20"]),
- ],
- ),
- MappedField(field_id=18, names=["add_18"]),
- ])
+ expected = NameMapping(
+ [
+ MappedField(field_id=1, names=["foo", "foo_update"]),
+ MappedField(field_id=2, names=["bar"]),
+ MappedField(field_id=3, names=["baz"]),
+ MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]),
+ MappedField(
+ field_id=6,
+ names=["quux"],
+ fields=[
+ MappedField(field_id=7, names=["key"]),
+ MappedField(
+ field_id=8,
+ names=["value"],
+ fields=[
+ MappedField(field_id=9, names=["key"]),
+ MappedField(field_id=10, names=["value"]),
+ ],
+ ),
+ ],
+ ),
+ MappedField(
+ field_id=11,
+ names=["location"],
+ fields=[
+ MappedField(
+ field_id=12,
+ names=["element"],
+ fields=[
+ MappedField(field_id=13, names=["latitude"]),
+ MappedField(field_id=14, names=["longitude"]),
+ ],
+ )
+ ],
+ ),
+ MappedField(
+ field_id=15,
+ names=["person"],
+ fields=[
+ MappedField(field_id=17, names=["age"]),
+ MappedField(field_id=19, names=["name"]),
+ MappedField(field_id=20, names=["add_20"]),
+ ],
+ ),
+ MappedField(field_id=18, names=["add_18"]),
+ ]
+ )
assert update_mapping(table_name_mapping_nested, updates, adds) == expected
diff --git a/tests/table/test_partitioning.py b/tests/table/test_partitioning.py
index d7425bc351..127d57a798 100644
--- a/tests/table/test_partitioning.py
+++ b/tests/table/test_partitioning.py
@@ -16,7 +16,8 @@
# under the License.
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec
from pyiceberg.schema import Schema
-from pyiceberg.transforms import BucketTransform, TruncateTransform
+from pyiceberg.transforms import BucketTransform, IdentityTransform, TruncateTransform
+from pyiceberg.typedef import Record
from pyiceberg.types import (
IntegerType,
NestedField,
@@ -118,6 +119,27 @@ def test_deserialize_partition_spec() -> None:
)
+def test_partition_spec_to_path() -> None:
+ schema = Schema(
+ NestedField(field_id=1, name="str", field_type=StringType(), required=False),
+ NestedField(field_id=2, name="other_str", field_type=StringType(), required=False),
+ NestedField(field_id=3, name="int", field_type=IntegerType(), required=True),
+ )
+
+ spec = PartitionSpec(
+ PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=19), name="my#str%bucket"),
+ PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="other str+bucket"),
+ PartitionField(source_id=3, field_id=1002, transform=BucketTransform(num_buckets=25), name="my!int:bucket"),
+ spec_id=3,
+ )
+
+ record = Record(**{"my#str%bucket": "my+str", "other str+bucket": "( )", "my!int:bucket": 10}) # type: ignore
+
+ # Both partition field names and values should be URL encoded, with spaces mapping to plus signs, to match the Java
+ # behaviour: https://github.com/apache/iceberg/blob/ca3db931b0f024f0412084751ac85dd4ef2da7e7/api/src/main/java/org/apache/iceberg/PartitionSpec.java#L198-L204
+ assert spec.partition_to_path(record, schema) == "my%23str%25bucket=my%2Bstr/other+str%2Bbucket=%28+%29/my%21int%3Abucket=10"
+
+
def test_partition_type(table_schema_simple: Schema) -> None:
spec = PartitionSpec(
PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=19), name="str_truncate"),
diff --git a/tests/test_schema.py b/tests/test_schema.py
index d1fc19df77..daa46dee1f 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -1618,11 +1618,13 @@ def test_append_nested_lists() -> None:
def test_union_with_pa_schema(primitive_fields: NestedField) -> None:
base_schema = Schema(NestedField(field_id=1, name="foo", field_type=StringType(), required=True))
- pa_schema = pa.schema([
- pa.field("foo", pa.string(), nullable=False),
- pa.field("bar", pa.int32(), nullable=True),
- pa.field("baz", pa.bool_(), nullable=True),
- ])
+ pa_schema = pa.schema(
+ [
+ pa.field("foo", pa.string(), nullable=False),
+ pa.field("bar", pa.int32(), nullable=True),
+ pa.field("baz", pa.bool_(), nullable=True),
+ ]
+ )
new_schema = UpdateSchema(transaction=None, schema=base_schema).union_by_name(pa_schema)._apply() # type: ignore
@@ -1642,10 +1644,12 @@ def test_arrow_schema() -> None:
NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False),
)
- expected_schema = pa.schema([
- pa.field("foo", pa.large_string(), nullable=False),
- pa.field("bar", pa.int32(), nullable=True),
- pa.field("baz", pa.bool_(), nullable=True),
- ])
+ expected_schema = pa.schema(
+ [
+ pa.field("foo", pa.large_string(), nullable=False),
+ pa.field("bar", pa.int32(), nullable=True),
+ pa.field("baz", pa.bool_(), nullable=True),
+ ]
+ )
assert base_schema.as_arrow() == expected_schema
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 2fa459527e..72e7d6b6d3 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -897,7 +897,7 @@ def test_projection_truncate_string_set_same_result(bound_reference_str: BoundRe
def test_projection_truncate_string_set_in(bound_reference_str: BoundReference[str]) -> None:
assert TruncateTransform(3).project(
"name", BoundIn(term=bound_reference_str, literals={literal("hello"), literal("world")})
- ) == In(term="name", literals={literal("hel"), literal("wor")})
+ ) == In(term="name", literals={literal("hel"), literal("wor")}) # codespell:ignore hel
def test_projection_truncate_string_set_not_in(bound_reference_str: BoundReference[str]) -> None:
diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py
index 154671c92e..3b1fc6f013 100644
--- a/tests/utils/test_manifest.py
+++ b/tests/utils/test_manifest.py
@@ -621,9 +621,9 @@ def test_write_manifest_list(
def test_file_format_case_insensitive(raw_file_format: str, expected_file_format: FileFormat) -> None:
if expected_file_format:
parsed_file_format = FileFormat(raw_file_format)
- assert parsed_file_format == expected_file_format, (
- f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}"
- )
+ assert (
+ parsed_file_format == expected_file_format
+ ), f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}"
else:
with pytest.raises(ValueError):
_ = FileFormat(raw_file_format)