diff --git a/.asf.yaml b/.asf.yaml index 2cc41779e3..b1f557e903 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -44,6 +44,7 @@ github: projects: true collaborators: # Note: the number of collaborators is limited to 10 - ajantha-bhat + - syun64 ghp_branch: gh-pages ghp_path: / diff --git a/.github/workflows/check-md-link.yml b/.github/workflows/check-md-link.yml new file mode 100644 index 0000000000..b91195e98a --- /dev/null +++ b/.github/workflows/check-md-link.yml @@ -0,0 +1,13 @@ +name: Check Markdown links + +on: + push: + paths: + - mkdocs/** + +jobs: + markdown-link-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - uses: gaurav-nelson/github-action-markdown-link-check@v1 diff --git a/.github/workflows/python-release.yml b/.github/workflows/python-release.yml index 219434664e..db88f7e824 100644 --- a/.github/workflows/python-release.yml +++ b/.github/workflows/python-release.yml @@ -34,7 +34,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ ubuntu-22.04, windows-2022, macos-11 ] + os: [ ubuntu-22.04, windows-2022, macos-11, macos-12, macos-13, macos-14 ] steps: - uses: actions/checkout@v4 @@ -43,7 +43,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.11' - name: Install poetry run: pip install poetry @@ -59,7 +59,7 @@ jobs: if: startsWith(matrix.os, 'ubuntu') - name: Build wheels - uses: pypa/cibuildwheel@v2.16.3 + uses: pypa/cibuildwheel@v2.16.5 with: output-dir: wheelhouse config-file: "pyproject.toml" @@ -67,7 +67,7 @@ jobs: # Ignore 32 bit architectures CIBW_ARCHS: "auto64" CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8,<3.12" - CIBW_TEST_REQUIRES: "pytest==7.4.2 moto==4.2.2" + CIBW_TEST_REQUIRES: "pytest==7.4.2 moto==5.0.1" CIBW_TEST_EXTRAS: "s3fs,glue" CIBW_TEST_COMMAND: "pytest {project}/tests/avro/test_decoder.py" # There is an upstream issue with installing on MacOSX @@ -80,7 +80,7 @@ jobs: if: startsWith(matrix.os, 'ubuntu') run: ls -lah dist/* && cp dist/* wheelhouse/ - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v3 with: name: "release-${{ github.event.inputs.version }}" path: ./wheelhouse/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index db6742a8cd..afdabf6fce 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,13 +36,13 @@ repos: - id: ruff-format args: [ --preview ] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.6.1 + rev: v1.8.0 hooks: - id: mypy args: [--install-types, --non-interactive, --config=pyproject.toml] - repo: https://github.com/hadialqattan/pycln - rev: v2.3.0 + rev: v2.4.0 hooks: - id: pycln args: [--config=pyproject.toml] diff --git a/Makefile b/Makefile index 4226614bd0..c3e816ebd5 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ install-poetry: pip install poetry==1.7.1 install-dependencies: - poetry install -E pyarrow -E hive -E s3fs -E glue -E adlfs -E duckdb -E ray -E sql-postgres -E gcsfs -E sql-sqlite + poetry install -E pyarrow -E hive -E s3fs -E glue -E adlfs -E duckdb -E ray -E sql-postgres -E gcsfs -E sql-sqlite -E daft install: | install-poetry install-dependencies diff --git a/NOTICE b/NOTICE index d7a3c57526..adcae2d516 100644 --- a/NOTICE +++ b/NOTICE @@ -1,6 +1,6 @@ Apache Iceberg -Copyright 2017-2022 The Apache Software Foundation +Copyright 2017-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/mkdocs/docs/SUMMARY.md b/mkdocs/docs/SUMMARY.md index 77dbcbbf89..40ba0bffd7 100644 --- a/mkdocs/docs/SUMMARY.md +++ b/mkdocs/docs/SUMMARY.md @@ -17,11 +17,12 @@ -- [Home](index.md) +- [Getting started](index.md) - [Configuration](configuration.md) - [CLI](cli.md) - [API](api.md) - [Contributing](contributing.md) +- [Community](community.md) - Releases - [Verify a release](verify-release.md) - [How to release](how-to-release.md) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 650d391807..724a45c52f 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -418,6 +418,63 @@ with table.update_schema(allow_incompatible_changes=True) as update: update.delete_column("some_field") ``` +## Partition evolution + +PyIceberg supports partition evolution. See the [partition evolution](https://iceberg.apache.org/spec/#partition-evolution) +for more details. + +The API to use when evolving partitions is the `update_spec` API on the table. + +```python +with table.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + update.add_field("event_ts", DayTransform(), "day_ts") +``` + +Updating the partition spec can also be done as part of a transaction with other operations. + +```python +with table.transaction() as transaction: + with transaction.update_spec() as update_spec: + update_spec.add_field("id", BucketTransform(16), "bucketed_id") + update_spec.add_field("event_ts", DayTransform(), "day_ts") + # ... Update properties etc +``` + +### Add fields + +New partition fields can be added via the `add_field` API which takes in the field name to partition on, +the partition transform, and an optional partition name. If the partition name is not specified, +one will be created. + +```python +with table.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + update.add_field("event_ts", DayTransform(), "day_ts") + # identity is a shortcut API for adding an IdentityTransform + update.identity("some_field") +``` + +### Remove fields + +Partition fields can also be removed via the `remove_field` API if it no longer makes sense to partition on those fields. + +```python +with table.update_spec() as update:some_partition_name + # Remove the partition field with the name + update.remove_field("some_partition_name") +``` + +### Rename fields + +Partition fields can also be renamed via the `rename_field` API. + +```python +with table.update_spec() as update: + # Rename the partition field with the name bucketed_id to sharded_id + update.rename_field("bucketed_id", "sharded_id") +``` + ## Table properties Set and remove properties through the `Transaction` API: @@ -636,3 +693,56 @@ print(ray_dataset.take(2)) }, ] ``` + +### Daft + +PyIceberg interfaces closely with Daft Dataframes (see also: [Daft integration with Iceberg](https://www.getdaft.io/projects/docs/en/latest/user_guide/integrations/iceberg.html)) which provides a full lazily optimized query engine interface on top of PyIceberg tables. + + + +!!! note "Requirements" + This requires [Daft to be installed](index.md). + + + +A table can be read easily into a Daft Dataframe: + +```python +df = table.to_daft() # equivalent to `daft.read_iceberg(table)` +df = df.where(df["trip_distance"] >= 10.0) +df = df.select("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime") +``` + +This returns a Daft Dataframe which is lazily materialized. Printing `df` will display the schema: + +``` +╭──────────┬───────────────────────────────┬───────────────────────────────╮ +│ VendorID ┆ tpep_pickup_datetime ┆ tpep_dropoff_datetime │ +│ --- ┆ --- ┆ --- │ +│ Int64 ┆ Timestamp(Microseconds, None) ┆ Timestamp(Microseconds, None) │ +╰──────────┴───────────────────────────────┴───────────────────────────────╯ + +(No data to display: Dataframe not materialized) +``` + +We can execute the Dataframe to preview the first few rows of the query with `df.show()`. + +This is correctly optimized to take advantage of Iceberg features such as hidden partitioning and file-level statistics for efficient reads. + +```python +df.show(2) +``` + +``` +╭──────────┬───────────────────────────────┬───────────────────────────────╮ +│ VendorID ┆ tpep_pickup_datetime ┆ tpep_dropoff_datetime │ +│ --- ┆ --- ┆ --- │ +│ Int64 ┆ Timestamp(Microseconds, None) ┆ Timestamp(Microseconds, None) │ +╞══════════╪═══════════════════════════════╪═══════════════════════════════╡ +│ 2 ┆ 2008-12-31T23:23:50.000000 ┆ 2009-01-01T00:34:31.000000 │ +├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 2 ┆ 2008-12-31T23:05:03.000000 ┆ 2009-01-01T16:10:18.000000 │ +╰──────────┴───────────────────────────────┴───────────────────────────────╯ + +(Showing first 2 rows) +``` diff --git a/mkdocs/docs/cli.md b/mkdocs/docs/cli.md index 695011a6ef..28e44955d7 100644 --- a/mkdocs/docs/cli.md +++ b/mkdocs/docs/cli.md @@ -36,6 +36,7 @@ Options: --catalog TEXT --verbose BOOLEAN --output [text|json] +--ugi TEXT --uri TEXT --credential TEXT --help Show this message and exit. diff --git a/mkdocs/docs/community.md b/mkdocs/docs/community.md new file mode 100644 index 0000000000..4c542bff8e --- /dev/null +++ b/mkdocs/docs/community.md @@ -0,0 +1,64 @@ +--- +hide: + - navigation +--- + + + +# Join the community + +Apache Iceberg tracks issues in GitHub and prefers to receive contributions as pull requests. + +Community discussions happen primarily on the [dev mailing list](https://lists.apache.org/list.html?dev@iceberg.apache.org), on [Apache Iceberg Slack workspace](https://join.slack.com/t/apache-iceberg/shared_invite/zt-287g3akar-K9Oe_En5j1UL7Y_Ikpai3A) in the #python channel, and on specific [GitHub issues](https://github.com/apache/iceberg-python/issues). + +## Iceberg Community Events + +The PyIceberg community sync is on the last Tuesday of every month. To join, make sure to subscribe to the [iceberg-python-sync Google group](https://groups.google.com/g/iceberg-python-sync). + +## Community Guidelines + +### Apache Iceberg Community Guidelines + +The Apache Iceberg community is built on the principles described in the [Apache Way](https://www.apache.org/theapacheway/index.html) +and all who engage with the community are expected to be respectful, open, come with the best interests of the community in mind, +and abide by the Apache Foundation [Code of Conduct](https://www.apache.org/foundation/policies/conduct.html). + +### Participants with Corporate Interests + +A wide range of corporate entities have interests that overlap in both features and frameworks related to Iceberg and while we +encourage engagement and contributions, the community is not a venue for marketing, solicitation, or recruitment. + +Any vendor who wants to participate in the Apache Iceberg community Slack workspace should create a dedicated vendor channel +for their organization prefixed by `vendor-`. + +This space can be used to discuss features and integration with Iceberg related to the vendor offering. This space should not +be used to promote competing vendor products/services or disparage other vendor offerings. Discussion should be focused on +questions asked by the community and not to expand/introduce/redirect users to alternate offerings. + +### Marketing / Solicitation / Recruiting + +The Apache Iceberg community is a space for everyone to operate free of influence. The development lists, Slack workspace, +and GitHub should not be used to market products or services. Solicitation or overt promotion should not be performed in common +channels or through direct messages. + +Recruitment of community members should not be conducted through direct messages or community channels, but opportunities +related to contributing to or using Iceberg can be posted to the `#jobs` channel. + +For questions regarding any of the guidelines above, please contact a PMC member diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 92934745fa..e1a6d3281b 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -46,7 +46,22 @@ The environment variable picked up by Iceberg starts with `PYICEBERG_` and then For example, `PYICEBERG_CATALOG__DEFAULT__S3__ACCESS_KEY_ID`, sets `s3.access-key-id` on the `default` catalog. -## FileIO +# Tables + +Iceberg tables support table properties to configure table behavior. + +## Write options + +| Key | Options | Default | Description | +| --------------------------------- | --------------------------------- | ------- | ------------------------------------------------------------------------------------------- | +| `write.parquet.compression-codec` | `{uncompressed,zstd,gzip,snappy}` | zstd | Sets the Parquet compression coddec. | +| `write.parquet.compression-level` | Integer | null | Parquet compression level for the codec. If not set, it is up to PyIceberg | +| `write.parquet.page-size-bytes` | Size in bytes | 1MB | Set a target threshold for the approximate encoded size of data pages within a column chunk | +| `write.parquet.page-row-limit` | Number of rows | 20000 | Set a target threshold for the approximate encoded size of data pages within a column chunk | +| `write.parquet.dict-size-bytes` | Size in bytes | 2MB | Set the dictionary page size limit per row group | +| `write.parquet.row-group-limit` | Number of rows | 122880 | The Parquet row group limit | + +# FileIO Iceberg works with the concept of a FileIO which is a pluggable module for reading, writing, and deleting files. By default, PyIceberg will try to initialize the FileIO that's suitable for the scheme (`s3://`, `gs://`, etc.) and will use the first one that's installed. @@ -133,6 +148,7 @@ catalog: | Key | Example | Description | | ---------------------- | ----------------------- | -------------------------------------------------------------------------------------------------- | | uri | https://rest-catalog/ws | URI identifying the REST Server | +| ugi | t-1234:secret | Hadoop UGI for Hive client. | | credential | t-1234:secret | Credential to use for OAuth2 credential flow when initializing the catalog | | token | FEW23.DFSDF.FSDF | Bearer token value to use for `Authorization` header | | rest.sigv4-enabled | true | Sign requests to the REST Server using AWS SigV4 protocol | @@ -140,6 +156,19 @@ catalog: | rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request | | rest.authorization-url | https://auth-service/cc | Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens') | +### Headers in RESTCatalog + +To configure custom headers in RESTCatalog, include them in the catalog properties with the prefix `header.`. This +ensures that all HTTP requests to the REST service include the specified headers. + +```yaml +catalog: + default: + uri: http://rest-catalog/ws/ + credential: t-1234:secret + header.content-type: application/vnd.api+json +``` + ## SQL Catalog The SQL catalog requires a database for its backend. PyIceberg supports PostgreSQL and SQLite through psycopg2. The database connection has to be configured using the `uri` property. See SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls): diff --git a/mkdocs/docs/contributing.md b/mkdocs/docs/contributing.md index 8ec6dcb2d2..7411382d2c 100644 --- a/mkdocs/docs/contributing.md +++ b/mkdocs/docs/contributing.md @@ -58,6 +58,22 @@ For IDEA ≤2021 you need to install the [Poetry integration as a plugin](https: Now you're set using Poetry, and all the tests will run in Poetry, and you'll have syntax highlighting in the pyproject.toml to indicate stale dependencies. +## Installation from source + +Clone the repository for local development: + +```sh +git clone https://github.com/apache/iceberg-python.git +cd iceberg-python +pip3 install -e ".[s3fs,hive]" +``` + +Install it directly for GitHub (not recommended), but sometimes handy: + +``` +pip install "git+https://github.com/apache/iceberg-python.git#egg=pyiceberg[s3fs]" +``` + ## Linting `pre-commit` is used for autoformatting and linting: diff --git a/mkdocs/docs/how-to-release.md b/mkdocs/docs/how-to-release.md index 9cecad2d5d..e41250a1f9 100644 --- a/mkdocs/docs/how-to-release.md +++ b/mkdocs/docs/how-to-release.md @@ -29,7 +29,17 @@ Make sure that the version is correct in `pyproject.toml` and `pyiceberg/__init_ ### Setting the tag -First set the tag on the commit: +Make sure that you're on the right branch, and the latest branch: + +For a Major/Minor release, make sure that you're on `main`, for patch versions the branch corresponding to the version that you want to patch, i.e. `pyiceberg-0.6.x`. + +```bash +git checkout +git fetch --all +git reset --hard apache/ +``` + +Set the tag on the last commit: ```bash export RC=rc1 @@ -136,11 +146,37 @@ cat release-announcement-email.txt ## Vote has passed -Once the vote has been passed, the latest version can be pushed to PyPi. Check out the Apache SVN and make sure to publish the right version with `twine`: +Once the vote has been passed, you can close the vote thread by concluding it: + +``` +Thanks everyone for voting! The 72 hours have passed, and a minimum of 3 binding votes have been cast: + ++1 Foo Bar (non-binding) +... ++1 Fokko Driesprong (binding) + +The release candidate has been accepted as PyIceberg . Thanks everyone, when all artifacts are published the announcement will be sent out. + +Kind regards, +``` + +### Copy the artifacts to the release dist + +``` +svn checkout https://dist.apache.org/repos/dist/dev/iceberg /tmp/iceberg-dist-dev +svn checkout https://dist.apache.org/repos/dist/release/iceberg/ /tmp/iceberg-dist-release + +mkdir -p /tmp/iceberg-dist-release/pyiceberg- +cp -r /tmp/iceberg-dist-dev/pyiceberg-rcN/* /tmp/iceberg-dist-release/pyiceberg- + +svn add /tmp/iceberg-dist-release/ +svn ci -m "PyIceberg " /tmp/iceberg-dist-release/ +``` + +The latest version can be pushed to PyPi. Check out the Apache SVN and make sure to publish the right version with `twine`: ```bash -svn checkout https://dist.apache.org/repos/dist/dev/iceberg /tmp/ -twine upload -s /tmp/iceberg/pyiceberg-0.1.0rc1 +twine upload -s /tmp/iceberg-dist-release/pyiceberg-/* ``` Send out an announcement on the dev mail list: diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 628f4f7dd4..a8c2c6bd3c 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -20,11 +20,11 @@ hide: - limitations under the License. --> -# PyIceberg +# Getting started with PyIceberg PyIceberg is a Python implementation for accessing Iceberg tables, without the need of a JVM. -## Install +## Installation Before installing PyIceberg, make sure that you're on an up-to-date version of `pip`: @@ -38,36 +38,156 @@ You can install the latest release version from pypi: pip install "pyiceberg[s3fs,hive]" ``` -Install it directly for GitHub (not recommended), but sometimes handy: +You can mix and match optional dependencies depending on your needs: + +| Key | Description: | +| ------------ | -------------------------------------------------------------------- | +| hive | Support for the Hive metastore | +| glue | Support for AWS Glue | +| dynamodb | Support for AWS DynamoDB | +| sql-postgres | Support for SQL Catalog backed by Postgresql | +| sql-sqlite | Support for SQL Catalog backed by SQLite | +| pyarrow | PyArrow as a FileIO implementation to interact with the object store | +| pandas | Installs both PyArrow and Pandas | +| duckdb | Installs both PyArrow and DuckDB | +| ray | Installs PyArrow, Pandas, and Ray | +| daft | Installs Daft | +| s3fs | S3FS as a FileIO implementation to interact with the object store | +| adlfs | ADLFS as a FileIO implementation to interact with the object store | +| snappy | Support for snappy Avro compression | +| gcsfs | GCSFS as a FileIO implementation to interact with the object store | + +You either need to install `s3fs`, `adlfs`, `gcsfs`, or `pyarrow` to be able to fetch files from an object store. + +## Connecting to a catalog + +Iceberg leverages the [catalog to have one centralized place to organize the tables](https://iceberg.apache.org/catalog/). This can be a traditional Hive catalog to store your Iceberg tables next to the rest, a vendor solution like the AWS Glue catalog, or an implementation of Icebergs' own [REST protocol](https://github.com/apache/iceberg/tree/main/open-api). Checkout the [configuration](configuration.md) page to find all the configuration details. + +For the sake of demonstration, we'll configure the catalog to use the `SqlCatalog` implementation, which will store information in a local `sqlite` database. We'll also configure the catalog to store data files in the local filesystem instead of an object store. This should not be used in production due to the limited scalability. +Create a temporary location for Iceberg: + +```shell +mkdir /tmp/warehouse ``` -pip install "git+https://github.com/apache/iceberg-python.git#egg=pyiceberg[s3fs]" + +Open a Python 3 REPL to set up the catalog: + +```python +from pyiceberg.catalog.sql import SqlCatalog + +warehouse_path = "/tmp/warehouse" +catalog = SqlCatalog( + "default", + **{ + "uri": f"sqlite:///{warehouse_path}/pyiceberg_catalog.db", + "warehouse": f"file://{warehouse_path}", + }, +) ``` -Or clone the repository for local development: +## Write a PyArrow dataframe -```sh -git clone https://github.com/apache/iceberg-python.git -cd iceberg-python -pip3 install -e ".[s3fs,hive]" +Let's take the Taxi dataset, and write this to an Iceberg table. + +First download one month of data: + +```shell +curl https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet -o /tmp/yellow_tripdata_2023-01.parquet ``` -You can mix and match optional dependencies depending on your needs: +Load it into your PyArrow dataframe: + +```python +import pyarrow.parquet as pq + +df = pq.read_table("/tmp/yellow_tripdata_2023-01.parquet") +``` + +Create a new Iceberg table: + +```python +catalog.create_namespace("default") + +table = catalog.create_table( + "default.taxi_dataset", + schema=df.schema, +) +``` + +Append the dataframe to the table: + +```python +table.append(df) +len(table.scan().to_arrow()) +``` + +3066766 rows have been written to the table. + +Now generate a tip-per-mile feature to train the model on: + +```python +import pyarrow.compute as pc + +df = df.append_column("tip_per_mile", pc.divide(df["tip_amount"], df["trip_distance"])) +``` + +Evolve the schema of the table with the new column: + +```python +with table.update_schema() as update_schema: + update_schema.union_by_name(df.schema) +``` + +And now we can write the new dataframe to the Iceberg table: + +```python +table.overwrite(df) +print(table.scan().to_arrow()) +``` + +And the new column is there: + +``` +taxi_dataset( + 1: VendorID: optional long, + 2: tpep_pickup_datetime: optional timestamp, + 3: tpep_dropoff_datetime: optional timestamp, + 4: passenger_count: optional double, + 5: trip_distance: optional double, + 6: RatecodeID: optional double, + 7: store_and_fwd_flag: optional string, + 8: PULocationID: optional long, + 9: DOLocationID: optional long, + 10: payment_type: optional long, + 11: fare_amount: optional double, + 12: extra: optional double, + 13: mta_tax: optional double, + 14: tip_amount: optional double, + 15: tolls_amount: optional double, + 16: improvement_surcharge: optional double, + 17: total_amount: optional double, + 18: congestion_surcharge: optional double, + 19: airport_fee: optional double, + 20: tip_per_mile: optional double +), +``` + +And we can see that 2371784 rows have a tip-per-mile: + +```python +df = table.scan(row_filter="tip_per_mile > 0").to_arrow() +len(df) +``` + +### Explore Iceberg data and metadata files + +Since the catalog was configured to use the local filesystem, we can explore how Iceberg saved data and metadata files from the above operations. + +```shell +find /tmp/warehouse/ +``` + +## More details -| Key | Description: | -| -------- | -------------------------------------------------------------------- | -| hive | Support for the Hive metastore | -| glue | Support for AWS Glue | -| dynamodb | Support for AWS DynamoDB | -| pyarrow | PyArrow as a FileIO implementation to interact with the object store | -| pandas | Installs both PyArrow and Pandas | -| duckdb | Installs both PyArrow and DuckDB | -| ray | Installs PyArrow, Pandas, and Ray | -| s3fs | S3FS as a FileIO implementation to interact with the object store | -| adlfs | ADLFS as a FileIO implementation to interact with the object store | -| snappy | Support for snappy Avro compression | -| gcs | GCS as the FileIO implementation to interact with the object store | - -You either need to install `s3fs`, `adlfs`, `gcs`, or `pyarrow` for fetching files. - -There is both a [CLI](cli.md) and [Python API](api.md) available. +For the details, please check the [CLI](cli.md) or [Python API](api.md) page. diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index fd5c182572..e2099b7c58 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -16,13 +16,13 @@ # under the License. mkdocs==1.5.3 -griffe==0.39.1 +griffe==0.40.1 jinja2==3.1.3 mkdocstrings==0.24.0 mkdocstrings-python==1.8.0 mkdocs-literate-nav==0.6.1 mkdocs-autorefs==0.5.0 mkdocs-gen-files==0.5.0 -mkdocs-material==9.5.5 +mkdocs-material==9.5.10 mkdocs-material-extensions==1.3.1 mkdocs-section-index==0.3.8 diff --git a/poetry.lock b/poetry.lock index 41f13a7c23..6cd3829a78 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "adlfs" -version = "2023.12.0" +version = "2024.2.0" description = "Access Azure Datalake Gen1 with fsspec and dask" optional = true python-versions = ">=3.8" files = [ - {file = "adlfs-2023.12.0-py3-none-any.whl", hash = "sha256:da6e391afd002c7bb1b75dcc286b78fdf5dbf29aca0f984462033df6311b4e7e"}, - {file = "adlfs-2023.12.0.tar.gz", hash = "sha256:a590694ed9f5a45741e82bff8bcf88c30a790da949310817330b5e7992b8a9e9"}, + {file = "adlfs-2024.2.0-py3-none-any.whl", hash = "sha256:6514fb147032eea843e4b773c557df90cf6a929185edc7c97f7a12b91bfc1fa1"}, + {file = "adlfs-2024.2.0.tar.gz", hash = "sha256:860f5ddbd7f3c2553d84a101717dc5736e823305e0d51e8c0058bc85a7fa304d"}, ] [package.dependencies] @@ -25,108 +25,108 @@ tests = ["arrow", "dask[dataframe]", "docker", "pytest", "pytest-mock"] [[package]] name = "aiobotocore" -version = "2.11.1" +version = "2.11.2" description = "Async client for aws services using botocore and aiohttp" optional = true python-versions = ">=3.8" files = [ - {file = "aiobotocore-2.11.1-py3-none-any.whl", hash = "sha256:904a7ad7cc8671d662cfd596906dafe839118ea2a66332c37908e3dcfdee1e45"}, - {file = "aiobotocore-2.11.1.tar.gz", hash = "sha256:0b095af50da2d6f94e93ca959e2a4876f0f0d84d534b61b21d8e050832d04ab6"}, + {file = "aiobotocore-2.11.2-py3-none-any.whl", hash = "sha256:487fede588040bfa3a43df945275c28c1c73ca75bf705295adb9fbadd2e89be7"}, + {file = "aiobotocore-2.11.2.tar.gz", hash = "sha256:6dd7352248e3523019c5a54a395d2b1c31080697fc80a9ad2672de4eec8c7abd"}, ] [package.dependencies] aiohttp = ">=3.7.4.post0,<4.0.0" aioitertools = ">=0.5.1,<1.0.0" -botocore = ">=1.33.2,<1.34.28" +botocore = ">=1.33.2,<1.34.35" wrapt = ">=1.10.10,<2.0.0" [package.extras] -awscli = ["awscli (>=1.31.2,<1.32.28)"] -boto3 = ["boto3 (>=1.33.2,<1.34.28)"] +awscli = ["awscli (>=1.31.2,<1.32.35)"] +boto3 = ["boto3 (>=1.33.2,<1.34.35)"] [[package]] name = "aiohttp" -version = "3.9.1" +version = "3.9.2" description = "Async http client/server framework (asyncio)" optional = true python-versions = ">=3.8" files = [ - {file = "aiohttp-3.9.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1f80197f8b0b846a8d5cf7b7ec6084493950d0882cc5537fb7b96a69e3c8590"}, - {file = "aiohttp-3.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c72444d17777865734aa1a4d167794c34b63e5883abb90356a0364a28904e6c0"}, - {file = "aiohttp-3.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b05d5cbe9dafcdc733262c3a99ccf63d2f7ce02543620d2bd8db4d4f7a22f83"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c4fa235d534b3547184831c624c0b7c1e262cd1de847d95085ec94c16fddcd5"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:289ba9ae8e88d0ba16062ecf02dd730b34186ea3b1e7489046fc338bdc3361c4"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bff7e2811814fa2271be95ab6e84c9436d027a0e59665de60edf44e529a42c1f"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81b77f868814346662c96ab36b875d7814ebf82340d3284a31681085c051320f"}, - {file = "aiohttp-3.9.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b9c7426923bb7bd66d409da46c41e3fb40f5caf679da624439b9eba92043fa6"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8d44e7bf06b0c0a70a20f9100af9fcfd7f6d9d3913e37754c12d424179b4e48f"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22698f01ff5653fe66d16ffb7658f582a0ac084d7da1323e39fd9eab326a1f26"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ca7ca5abfbfe8d39e653870fbe8d7710be7a857f8a8386fc9de1aae2e02ce7e4"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:8d7f98fde213f74561be1d6d3fa353656197f75d4edfbb3d94c9eb9b0fc47f5d"}, - {file = "aiohttp-3.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5216b6082c624b55cfe79af5d538e499cd5f5b976820eac31951fb4325974501"}, - {file = "aiohttp-3.9.1-cp310-cp310-win32.whl", hash = "sha256:0e7ba7ff228c0d9a2cd66194e90f2bca6e0abca810b786901a569c0de082f489"}, - {file = "aiohttp-3.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:c7e939f1ae428a86e4abbb9a7c4732bf4706048818dfd979e5e2839ce0159f23"}, - {file = "aiohttp-3.9.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:df9cf74b9bc03d586fc53ba470828d7b77ce51b0582d1d0b5b2fb673c0baa32d"}, - {file = "aiohttp-3.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecca113f19d5e74048c001934045a2b9368d77b0b17691d905af18bd1c21275e"}, - {file = "aiohttp-3.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8cef8710fb849d97c533f259103f09bac167a008d7131d7b2b0e3a33269185c0"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bea94403a21eb94c93386d559bce297381609153e418a3ffc7d6bf772f59cc35"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91c742ca59045dce7ba76cab6e223e41d2c70d79e82c284a96411f8645e2afff"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c93b7c2e52061f0925c3382d5cb8980e40f91c989563d3d32ca280069fd6a87"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee2527134f95e106cc1653e9ac78846f3a2ec1004cf20ef4e02038035a74544d"}, - {file = "aiohttp-3.9.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11ff168d752cb41e8492817e10fb4f85828f6a0142b9726a30c27c35a1835f01"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b8c3a67eb87394386847d188996920f33b01b32155f0a94f36ca0e0c635bf3e3"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c7b5d5d64e2a14e35a9240b33b89389e0035e6de8dbb7ffa50d10d8b65c57449"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:69985d50a2b6f709412d944ffb2e97d0be154ea90600b7a921f95a87d6f108a2"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:c9110c06eaaac7e1f5562caf481f18ccf8f6fdf4c3323feab28a93d34cc646bd"}, - {file = "aiohttp-3.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d737e69d193dac7296365a6dcb73bbbf53bb760ab25a3727716bbd42022e8d7a"}, - {file = "aiohttp-3.9.1-cp311-cp311-win32.whl", hash = "sha256:4ee8caa925aebc1e64e98432d78ea8de67b2272252b0a931d2ac3bd876ad5544"}, - {file = "aiohttp-3.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:a34086c5cc285be878622e0a6ab897a986a6e8bf5b67ecb377015f06ed316587"}, - {file = "aiohttp-3.9.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f800164276eec54e0af5c99feb9494c295118fc10a11b997bbb1348ba1a52065"}, - {file = "aiohttp-3.9.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:500f1c59906cd142d452074f3811614be04819a38ae2b3239a48b82649c08821"}, - {file = "aiohttp-3.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0b0a6a36ed7e164c6df1e18ee47afbd1990ce47cb428739d6c99aaabfaf1b3af"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69da0f3ed3496808e8cbc5123a866c41c12c15baaaead96d256477edf168eb57"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:176df045597e674fa950bf5ae536be85699e04cea68fa3a616cf75e413737eb5"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b796b44111f0cab6bbf66214186e44734b5baab949cb5fb56154142a92989aeb"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f27fdaadce22f2ef950fc10dcdf8048407c3b42b73779e48a4e76b3c35bca26c"}, - {file = "aiohttp-3.9.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcb6532b9814ea7c5a6a3299747c49de30e84472fa72821b07f5a9818bce0f66"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:54631fb69a6e44b2ba522f7c22a6fb2667a02fd97d636048478db2fd8c4e98fe"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4b4c452d0190c5a820d3f5c0f3cd8a28ace48c54053e24da9d6041bf81113183"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:cae4c0c2ca800c793cae07ef3d40794625471040a87e1ba392039639ad61ab5b"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:565760d6812b8d78d416c3c7cfdf5362fbe0d0d25b82fed75d0d29e18d7fc30f"}, - {file = "aiohttp-3.9.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:54311eb54f3a0c45efb9ed0d0a8f43d1bc6060d773f6973efd90037a51cd0a3f"}, - {file = "aiohttp-3.9.1-cp312-cp312-win32.whl", hash = "sha256:85c3e3c9cb1d480e0b9a64c658cd66b3cfb8e721636ab8b0e746e2d79a7a9eed"}, - {file = "aiohttp-3.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:11cb254e397a82efb1805d12561e80124928e04e9c4483587ce7390b3866d213"}, - {file = "aiohttp-3.9.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8a22a34bc594d9d24621091d1b91511001a7eea91d6652ea495ce06e27381f70"}, - {file = "aiohttp-3.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:598db66eaf2e04aa0c8900a63b0101fdc5e6b8a7ddd805c56d86efb54eb66672"}, - {file = "aiohttp-3.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c9376e2b09895c8ca8b95362283365eb5c03bdc8428ade80a864160605715f1"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41473de252e1797c2d2293804e389a6d6986ef37cbb4a25208de537ae32141dd"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c5857612c9813796960c00767645cb5da815af16dafb32d70c72a8390bbf690"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffcd828e37dc219a72c9012ec44ad2e7e3066bec6ff3aaa19e7d435dbf4032ca"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:219a16763dc0294842188ac8a12262b5671817042b35d45e44fd0a697d8c8361"}, - {file = "aiohttp-3.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f694dc8a6a3112059258a725a4ebe9acac5fe62f11c77ac4dcf896edfa78ca28"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bcc0ea8d5b74a41b621ad4a13d96c36079c81628ccc0b30cfb1603e3dfa3a014"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:90ec72d231169b4b8d6085be13023ece8fa9b1bb495e4398d847e25218e0f431"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:cf2a0ac0615842b849f40c4d7f304986a242f1e68286dbf3bd7a835e4f83acfd"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:0e49b08eafa4f5707ecfb321ab9592717a319e37938e301d462f79b4e860c32a"}, - {file = "aiohttp-3.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2c59e0076ea31c08553e868cec02d22191c086f00b44610f8ab7363a11a5d9d8"}, - {file = "aiohttp-3.9.1-cp38-cp38-win32.whl", hash = "sha256:4831df72b053b1eed31eb00a2e1aff6896fb4485301d4ccb208cac264b648db4"}, - {file = "aiohttp-3.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:3135713c5562731ee18f58d3ad1bf41e1d8883eb68b363f2ffde5b2ea4b84cc7"}, - {file = "aiohttp-3.9.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cfeadf42840c1e870dc2042a232a8748e75a36b52d78968cda6736de55582766"}, - {file = "aiohttp-3.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:70907533db712f7aa791effb38efa96f044ce3d4e850e2d7691abd759f4f0ae0"}, - {file = "aiohttp-3.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cdefe289681507187e375a5064c7599f52c40343a8701761c802c1853a504558"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7481f581251bb5558ba9f635db70908819caa221fc79ee52a7f58392778c636"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:49f0c1b3c2842556e5de35f122fc0f0b721334ceb6e78c3719693364d4af8499"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d406b01a9f5a7e232d1b0d161b40c05275ffbcbd772dc18c1d5a570961a1ca4"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d8e4450e7fe24d86e86b23cc209e0023177b6d59502e33807b732d2deb6975f"}, - {file = "aiohttp-3.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c0266cd6f005e99f3f51e583012de2778e65af6b73860038b968a0a8888487a"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab221850108a4a063c5b8a70f00dd7a1975e5a1713f87f4ab26a46e5feac5a0e"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c88a15f272a0ad3d7773cf3a37cc7b7d077cbfc8e331675cf1346e849d97a4e5"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:237533179d9747080bcaad4d02083ce295c0d2eab3e9e8ce103411a4312991a0"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:02ab6006ec3c3463b528374c4cdce86434e7b89ad355e7bf29e2f16b46c7dd6f"}, - {file = "aiohttp-3.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04fa38875e53eb7e354ece1607b1d2fdee2d175ea4e4d745f6ec9f751fe20c7c"}, - {file = "aiohttp-3.9.1-cp39-cp39-win32.whl", hash = "sha256:82eefaf1a996060602f3cc1112d93ba8b201dbf5d8fd9611227de2003dddb3b7"}, - {file = "aiohttp-3.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:9b05d33ff8e6b269e30a7957bd3244ffbce2a7a35a81b81c382629b80af1a8bf"}, - {file = "aiohttp-3.9.1.tar.gz", hash = "sha256:8fc49a87ac269d4529da45871e2ffb6874e87779c3d0e2ccd813c0899221239d"}, + {file = "aiohttp-3.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:772fbe371788e61c58d6d3d904268e48a594ba866804d08c995ad71b144f94cb"}, + {file = "aiohttp-3.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:edd4f1af2253f227ae311ab3d403d0c506c9b4410c7fc8d9573dec6d9740369f"}, + {file = "aiohttp-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cfee9287778399fdef6f8a11c9e425e1cb13cc9920fd3a3df8f122500978292b"}, + {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc158466f6a980a6095ee55174d1de5730ad7dec251be655d9a6a9dd7ea1ff9"}, + {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54ec82f45d57c9a65a1ead3953b51c704f9587440e6682f689da97f3e8defa35"}, + {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abeb813a18eb387f0d835ef51f88568540ad0325807a77a6e501fed4610f864e"}, + {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc91d07280d7d169f3a0f9179d8babd0ee05c79d4d891447629ff0d7d8089ec2"}, + {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b65e861f4bebfb660f7f0f40fa3eb9f2ab9af10647d05dac824390e7af8f75b7"}, + {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:04fd8ffd2be73d42bcf55fd78cde7958eeee6d4d8f73c3846b7cba491ecdb570"}, + {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3d8d962b439a859b3ded9a1e111a4615357b01620a546bc601f25b0211f2da81"}, + {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:8ceb658afd12b27552597cf9a65d9807d58aef45adbb58616cdd5ad4c258c39e"}, + {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0e4ee4df741670560b1bc393672035418bf9063718fee05e1796bf867e995fad"}, + {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2dec87a556f300d3211decf018bfd263424f0690fcca00de94a837949fbcea02"}, + {file = "aiohttp-3.9.2-cp310-cp310-win32.whl", hash = "sha256:3e1a800f988ce7c4917f34096f81585a73dbf65b5c39618b37926b1238cf9bc4"}, + {file = "aiohttp-3.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:ea510718a41b95c236c992b89fdfc3d04cc7ca60281f93aaada497c2b4e05c46"}, + {file = "aiohttp-3.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6aaa6f99256dd1b5756a50891a20f0d252bd7bdb0854c5d440edab4495c9f973"}, + {file = "aiohttp-3.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a27d8c70ad87bcfce2e97488652075a9bdd5b70093f50b10ae051dfe5e6baf37"}, + {file = "aiohttp-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:54287bcb74d21715ac8382e9de146d9442b5f133d9babb7e5d9e453faadd005e"}, + {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb3d05569aa83011fcb346b5266e00b04180105fcacc63743fc2e4a1862a891"}, + {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c8534e7d69bb8e8d134fe2be9890d1b863518582f30c9874ed7ed12e48abe3c4"}, + {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bd9d5b989d57b41e4ff56ab250c5ddf259f32db17159cce630fd543376bd96b"}, + {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa6904088e6642609981f919ba775838ebf7df7fe64998b1a954fb411ffb4663"}, + {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bda42eb410be91b349fb4ee3a23a30ee301c391e503996a638d05659d76ea4c2"}, + {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:193cc1ccd69d819562cc7f345c815a6fc51d223b2ef22f23c1a0f67a88de9a72"}, + {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b9f1cb839b621f84a5b006848e336cf1496688059d2408e617af33e3470ba204"}, + {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:d22a0931848b8c7a023c695fa2057c6aaac19085f257d48baa24455e67df97ec"}, + {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4112d8ba61fbd0abd5d43a9cb312214565b446d926e282a6d7da3f5a5aa71d36"}, + {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c4ad4241b52bb2eb7a4d2bde060d31c2b255b8c6597dd8deac2f039168d14fd7"}, + {file = "aiohttp-3.9.2-cp311-cp311-win32.whl", hash = "sha256:ee2661a3f5b529f4fc8a8ffee9f736ae054adfb353a0d2f78218be90617194b3"}, + {file = "aiohttp-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:4deae2c165a5db1ed97df2868ef31ca3cc999988812e82386d22937d9d6fed52"}, + {file = "aiohttp-3.9.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:6f4cdba12539215aaecf3c310ce9d067b0081a0795dd8a8805fdb67a65c0572a"}, + {file = "aiohttp-3.9.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:84e843b33d5460a5c501c05539809ff3aee07436296ff9fbc4d327e32aa3a326"}, + {file = "aiohttp-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8008d0f451d66140a5aa1c17e3eedc9d56e14207568cd42072c9d6b92bf19b52"}, + {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61c47ab8ef629793c086378b1df93d18438612d3ed60dca76c3422f4fbafa792"}, + {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc71f748e12284312f140eaa6599a520389273174b42c345d13c7e07792f4f57"}, + {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a1c3a4d0ab2f75f22ec80bca62385db2e8810ee12efa8c9e92efea45c1849133"}, + {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a87aa0b13bbee025faa59fa58861303c2b064b9855d4c0e45ec70182bbeba1b"}, + {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2cc0d04688b9f4a7854c56c18aa7af9e5b0a87a28f934e2e596ba7e14783192"}, + {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1956e3ac376b1711c1533266dec4efd485f821d84c13ce1217d53e42c9e65f08"}, + {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:114da29f39eccd71b93a0fcacff178749a5c3559009b4a4498c2c173a6d74dff"}, + {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3f17999ae3927d8a9a823a1283b201344a0627272f92d4f3e3a4efe276972fe8"}, + {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:f31df6a32217a34ae2f813b152a6f348154f948c83213b690e59d9e84020925c"}, + {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7a75307ffe31329928a8d47eae0692192327c599113d41b278d4c12b54e1bd11"}, + {file = "aiohttp-3.9.2-cp312-cp312-win32.whl", hash = "sha256:972b63d589ff8f305463593050a31b5ce91638918da38139b9d8deaba9e0fed7"}, + {file = "aiohttp-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:200dc0246f0cb5405c80d18ac905c8350179c063ea1587580e3335bfc243ba6a"}, + {file = "aiohttp-3.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:158564d0d1020e0d3fe919a81d97aadad35171e13e7b425b244ad4337fc6793a"}, + {file = "aiohttp-3.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:da1346cd0ccb395f0ed16b113ebb626fa43b7b07fd7344fce33e7a4f04a8897a"}, + {file = "aiohttp-3.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:eaa9256de26ea0334ffa25f1913ae15a51e35c529a1ed9af8e6286dd44312554"}, + {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1543e7fb00214fb4ccead42e6a7d86f3bb7c34751ec7c605cca7388e525fd0b4"}, + {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:186e94570433a004e05f31f632726ae0f2c9dee4762a9ce915769ce9c0a23d89"}, + {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d52d20832ac1560f4510d68e7ba8befbc801a2b77df12bd0cd2bcf3b049e52a4"}, + {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c45e4e815ac6af3b72ca2bde9b608d2571737bb1e2d42299fc1ffdf60f6f9a1"}, + {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa906b9bdfd4a7972dd0628dbbd6413d2062df5b431194486a78f0d2ae87bd55"}, + {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:68bbee9e17d66f17bb0010aa15a22c6eb28583edcc8b3212e2b8e3f77f3ebe2a"}, + {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4c189b64bd6d9a403a1a3f86a3ab3acbc3dc41a68f73a268a4f683f89a4dec1f"}, + {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8a7876f794523123bca6d44bfecd89c9fec9ec897a25f3dd202ee7fc5c6525b7"}, + {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:d23fba734e3dd7b1d679b9473129cd52e4ec0e65a4512b488981a56420e708db"}, + {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b141753be581fab842a25cb319f79536d19c2a51995d7d8b29ee290169868eab"}, + {file = "aiohttp-3.9.2-cp38-cp38-win32.whl", hash = "sha256:103daf41ff3b53ba6fa09ad410793e2e76c9d0269151812e5aba4b9dd674a7e8"}, + {file = "aiohttp-3.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:328918a6c2835861ff7afa8c6d2c70c35fdaf996205d5932351bdd952f33fa2f"}, + {file = "aiohttp-3.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5264d7327c9464786f74e4ec9342afbbb6ee70dfbb2ec9e3dfce7a54c8043aa3"}, + {file = "aiohttp-3.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07205ae0015e05c78b3288c1517afa000823a678a41594b3fdc870878d645305"}, + {file = "aiohttp-3.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0a1e638cffc3ec4d4784b8b4fd1cf28968febc4bd2718ffa25b99b96a741bd"}, + {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d43302a30ba1166325974858e6ef31727a23bdd12db40e725bec0f759abce505"}, + {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16a967685907003765855999af11a79b24e70b34dc710f77a38d21cd9fc4f5fe"}, + {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6fa3ee92cd441d5c2d07ca88d7a9cef50f7ec975f0117cd0c62018022a184308"}, + {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b500c5ad9c07639d48615a770f49618130e61be36608fc9bc2d9bae31732b8f"}, + {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c07327b368745b1ce2393ae9e1aafed7073d9199e1dcba14e035cc646c7941bf"}, + {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cc7d6502c23a0ec109687bf31909b3fb7b196faf198f8cff68c81b49eb316ea9"}, + {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:07be2be7071723c3509ab5c08108d3a74f2181d4964e869f2504aaab68f8d3e8"}, + {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:122468f6fee5fcbe67cb07014a08c195b3d4c41ff71e7b5160a7bcc41d585a5f"}, + {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:00a9abcea793c81e7f8778ca195a1714a64f6d7436c4c0bb168ad2a212627000"}, + {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a9825fdd64ecac5c670234d80bb52bdcaa4139d1f839165f548208b3779c6c6"}, + {file = "aiohttp-3.9.2-cp39-cp39-win32.whl", hash = "sha256:5422cd9a4a00f24c7244e1b15aa9b87935c85fb6a00c8ac9b2527b38627a9211"}, + {file = "aiohttp-3.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:7d579dcd5d82a86a46f725458418458fa43686f6a7b252f2966d359033ffc8ab"}, + {file = "aiohttp-3.9.2.tar.gz", hash = "sha256:b0ad0a5e86ce73f5368a164c10ada10504bf91869c05ab75d982c6048217fbf7"}, ] [package.dependencies] @@ -332,17 +332,17 @@ files = [ [[package]] name = "boto3" -version = "1.34.27" +version = "1.34.34" description = "The AWS SDK for Python" optional = false python-versions = ">= 3.8" files = [ - {file = "boto3-1.34.27-py3-none-any.whl", hash = "sha256:3626db4ba9fbb1b58c8fe923da5ed670873b3d881a102956ea19d3b69cd097cc"}, - {file = "boto3-1.34.27.tar.gz", hash = "sha256:ebdd938019f3df2e7b50585353963d4553faf3fbb7b2085c440107fa6caa233b"}, + {file = "boto3-1.34.34-py3-none-any.whl", hash = "sha256:33a8b6d9136fa7427160edb92d2e50f2035f04e9d63a2d1027349053e12626aa"}, + {file = "boto3-1.34.34.tar.gz", hash = "sha256:b2f321e20966f021ec800b7f2c01287a3dd04fc5965acdfbaa9c505a24ca45d1"}, ] [package.dependencies] -botocore = ">=1.34.27,<1.35.0" +botocore = ">=1.34.34,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -351,13 +351,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.27" +version = "1.34.34" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">= 3.8" files = [ - {file = "botocore-1.34.27-py3-none-any.whl", hash = "sha256:1c10f247136ad17b6ef1588c1e043e294dbaebdebe9ce84dc56713029f515c53"}, - {file = "botocore-1.34.27.tar.gz", hash = "sha256:a0e68ba264275b358b8c1cca604161f4d9465cf7847d73e929543a9f30ff22d1"}, + {file = "botocore-1.34.34-py3-none-any.whl", hash = "sha256:cd060b0d88ebb2b893f1411c1db7f2ba66cc18e52dcc57ad029564ef5fec437b"}, + {file = "botocore-1.34.34.tar.gz", hash = "sha256:54093dc97372bb7683f5c61a279aa8240408abf3b2cc494ae82a9a90c1b784b5"}, ] [package.dependencies] @@ -641,63 +641,63 @@ files = [ [[package]] name = "coverage" -version = "7.4.1" +version = "7.4.2" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:077d366e724f24fc02dbfe9d946534357fda71af9764ff99d73c3c596001bbd7"}, - {file = "coverage-7.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0193657651f5399d433c92f8ae264aff31fc1d066deee4b831549526433f3f61"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d17bbc946f52ca67adf72a5ee783cd7cd3477f8f8796f59b4974a9b59cacc9ee"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3277f5fa7483c927fe3a7b017b39351610265308f5267ac6d4c2b64cc1d8d25"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dceb61d40cbfcf45f51e59933c784a50846dc03211054bd76b421a713dcdf19"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6008adeca04a445ea6ef31b2cbaf1d01d02986047606f7da266629afee982630"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c61f66d93d712f6e03369b6a7769233bfda880b12f417eefdd4f16d1deb2fc4c"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9bb62fac84d5f2ff523304e59e5c439955fb3b7f44e3d7b2085184db74d733b"}, - {file = "coverage-7.4.1-cp310-cp310-win32.whl", hash = "sha256:f86f368e1c7ce897bf2457b9eb61169a44e2ef797099fb5728482b8d69f3f016"}, - {file = "coverage-7.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:869b5046d41abfea3e381dd143407b0d29b8282a904a19cb908fa24d090cc018"}, - {file = "coverage-7.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ffb498a83d7e0305968289441914154fb0ef5d8b3157df02a90c6695978295"}, - {file = "coverage-7.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cacfaefe6089d477264001f90f55b7881ba615953414999c46cc9713ff93c8c"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d6850e6e36e332d5511a48a251790ddc545e16e8beaf046c03985c69ccb2676"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e961aa13b6d47f758cc5879383d27b5b3f3dcd9ce8cdbfdc2571fe86feb4dd"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfd1e1b9f0898817babf840b77ce9fe655ecbe8b1b327983df485b30df8cc011"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6b00e21f86598b6330f0019b40fb397e705135040dbedc2ca9a93c7441178e74"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:536d609c6963c50055bab766d9951b6c394759190d03311f3e9fcf194ca909e1"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ac8f8eb153724f84885a1374999b7e45734bf93a87d8df1e7ce2146860edef6"}, - {file = "coverage-7.4.1-cp311-cp311-win32.whl", hash = "sha256:f3771b23bb3675a06f5d885c3630b1d01ea6cac9e84a01aaf5508706dba546c5"}, - {file = "coverage-7.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:9d2f9d4cc2a53b38cabc2d6d80f7f9b7e3da26b2f53d48f05876fef7956b6968"}, - {file = "coverage-7.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f68ef3660677e6624c8cace943e4765545f8191313a07288a53d3da188bd8581"}, - {file = "coverage-7.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23b27b8a698e749b61809fb637eb98ebf0e505710ec46a8aa6f1be7dc0dc43a6"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3424c554391dc9ef4a92ad28665756566a28fecf47308f91841f6c49288e66"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0860a348bf7004c812c8368d1fc7f77fe8e4c095d661a579196a9533778e156"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe558371c1bdf3b8fa03e097c523fb9645b8730399c14fe7721ee9c9e2a545d3"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3468cc8720402af37b6c6e7e2a9cdb9f6c16c728638a2ebc768ba1ef6f26c3a1"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:02f2edb575d62172aa28fe00efe821ae31f25dc3d589055b3fb64d51e52e4ab1"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ca6e61dc52f601d1d224526360cdeab0d0712ec104a2ce6cc5ccef6ed9a233bc"}, - {file = "coverage-7.4.1-cp312-cp312-win32.whl", hash = "sha256:ca7b26a5e456a843b9b6683eada193fc1f65c761b3a473941efe5a291f604c74"}, - {file = "coverage-7.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:85ccc5fa54c2ed64bd91ed3b4a627b9cce04646a659512a051fa82a92c04a448"}, - {file = "coverage-7.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8bdb0285a0202888d19ec6b6d23d5990410decb932b709f2b0dfe216d031d218"}, - {file = "coverage-7.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:918440dea04521f499721c039863ef95433314b1db00ff826a02580c1f503e45"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379d4c7abad5afbe9d88cc31ea8ca262296480a86af945b08214eb1a556a3e4d"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b094116f0b6155e36a304ff912f89bbb5067157aff5f94060ff20bbabdc8da06"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f5968608b1fe2a1d00d01ad1017ee27efd99b3437e08b83ded9b7af3f6f766"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:10e88e7f41e6197ea0429ae18f21ff521d4f4490aa33048f6c6f94c6045a6a75"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a4a3907011d39dbc3e37bdc5df0a8c93853c369039b59efa33a7b6669de04c60"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6d224f0c4c9c98290a6990259073f496fcec1b5cc613eecbd22786d398ded3ad"}, - {file = "coverage-7.4.1-cp38-cp38-win32.whl", hash = "sha256:23f5881362dcb0e1a92b84b3c2809bdc90db892332daab81ad8f642d8ed55042"}, - {file = "coverage-7.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:a07f61fc452c43cd5328b392e52555f7d1952400a1ad09086c4a8addccbd138d"}, - {file = "coverage-7.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8e738a492b6221f8dcf281b67129510835461132b03024830ac0e554311a5c54"}, - {file = "coverage-7.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46342fed0fff72efcda77040b14728049200cbba1279e0bf1188f1f2078c1d70"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9641e21670c68c7e57d2053ddf6c443e4f0a6e18e547e86af3fad0795414a628"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aeb2c2688ed93b027eb0d26aa188ada34acb22dceea256d76390eea135083950"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0491275c3b9971cdbd28a4595c2cb5838f08036bca31765bad5e17edf900b2c7"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8dfc5e195bbef80aabd81596ef52a1277ee7143fe419efc3c4d8ba2754671756"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1a78b656a4d12b0490ca72651fe4d9f5e07e3c6461063a9b6265ee45eb2bdd35"}, - {file = "coverage-7.4.1-cp39-cp39-win32.whl", hash = "sha256:f90515974b39f4dea2f27c0959688621b46d96d5a626cf9c53dbc653a895c05c"}, - {file = "coverage-7.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:64e723ca82a84053dd7bfcc986bdb34af8d9da83c521c19d6b472bc6880e191a"}, - {file = "coverage-7.4.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:32a8d985462e37cfdab611a6f95b09d7c091d07668fdc26e47a725ee575fe166"}, - {file = "coverage-7.4.1.tar.gz", hash = "sha256:1ed4b95480952b1a26d863e546fa5094564aa0065e1e5f0d4d0041f293251d04"}, + {file = "coverage-7.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf54c3e089179d9d23900e3efc86d46e4431188d9a657f345410eecdd0151f50"}, + {file = "coverage-7.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fe6e43c8b510719b48af7db9631b5fbac910ade4bd90e6378c85ac5ac706382c"}, + {file = "coverage-7.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b98c89db1b150d851a7840142d60d01d07677a18f0f46836e691c38134ed18b"}, + {file = "coverage-7.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5f9683be6a5b19cd776ee4e2f2ffb411424819c69afab6b2db3a0a364ec6642"}, + {file = "coverage-7.4.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78cdcbf7b9cb83fe047ee09298e25b1cd1636824067166dc97ad0543b079d22f"}, + {file = "coverage-7.4.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2599972b21911111114100d362aea9e70a88b258400672626efa2b9e2179609c"}, + {file = "coverage-7.4.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ef00d31b7569ed3cb2036f26565f1984b9fc08541731ce01012b02a4c238bf03"}, + {file = "coverage-7.4.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:20a875bfd8c282985c4720c32aa05056f77a68e6d8bbc5fe8632c5860ee0b49b"}, + {file = "coverage-7.4.2-cp310-cp310-win32.whl", hash = "sha256:b3f2b1eb229f23c82898eedfc3296137cf1f16bb145ceab3edfd17cbde273fb7"}, + {file = "coverage-7.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7df95fdd1432a5d2675ce630fef5f239939e2b3610fe2f2b5bf21fa505256fa3"}, + {file = "coverage-7.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8ddbd158e069dded57738ea69b9744525181e99974c899b39f75b2b29a624e2"}, + {file = "coverage-7.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81a5fb41b0d24447a47543b749adc34d45a2cf77b48ca74e5bf3de60a7bd9edc"}, + {file = "coverage-7.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2412e98e70f16243be41d20836abd5f3f32edef07cbf8f407f1b6e1ceae783ac"}, + {file = "coverage-7.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb79414c15c6f03f56cc68fa06994f047cf20207c31b5dad3f6bab54a0f66ef"}, + {file = "coverage-7.4.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf89ab85027427d351f1de918aff4b43f4eb5f33aff6835ed30322a86ac29c9e"}, + {file = "coverage-7.4.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a178b7b1ac0f1530bb28d2e51f88c0bab3e5949835851a60dda80bff6052510c"}, + {file = "coverage-7.4.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:06fe398145a2e91edaf1ab4eee66149c6776c6b25b136f4a86fcbbb09512fd10"}, + {file = "coverage-7.4.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:18cac867950943fe93d6cd56a67eb7dcd2d4a781a40f4c1e25d6f1ed98721a55"}, + {file = "coverage-7.4.2-cp311-cp311-win32.whl", hash = "sha256:f72cdd2586f9a769570d4b5714a3837b3a59a53b096bb954f1811f6a0afad305"}, + {file = "coverage-7.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:d779a48fac416387dd5673fc5b2d6bd903ed903faaa3247dc1865c65eaa5a93e"}, + {file = "coverage-7.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:adbdfcda2469d188d79771d5696dc54fab98a16d2ef7e0875013b5f56a251047"}, + {file = "coverage-7.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ac4bab32f396b03ebecfcf2971668da9275b3bb5f81b3b6ba96622f4ef3f6e17"}, + {file = "coverage-7.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:006d220ba2e1a45f1de083d5022d4955abb0aedd78904cd5a779b955b019ec73"}, + {file = "coverage-7.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3733545eb294e5ad274abe131d1e7e7de4ba17a144505c12feca48803fea5f64"}, + {file = "coverage-7.4.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42a9e754aa250fe61f0f99986399cec086d7e7a01dd82fd863a20af34cbce962"}, + {file = "coverage-7.4.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2ed37e16cf35c8d6e0b430254574b8edd242a367a1b1531bd1adc99c6a5e00fe"}, + {file = "coverage-7.4.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b953275d4edfab6cc0ed7139fa773dfb89e81fee1569a932f6020ce7c6da0e8f"}, + {file = "coverage-7.4.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32b4ab7e6c924f945cbae5392832e93e4ceb81483fd6dc4aa8fb1a97b9d3e0e1"}, + {file = "coverage-7.4.2-cp312-cp312-win32.whl", hash = "sha256:f5df76c58977bc35a49515b2fbba84a1d952ff0ec784a4070334dfbec28a2def"}, + {file = "coverage-7.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:34423abbaad70fea9d0164add189eabaea679068ebdf693baa5c02d03e7db244"}, + {file = "coverage-7.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5b11f9c6587668e495cc7365f85c93bed34c3a81f9f08b0920b87a89acc13469"}, + {file = "coverage-7.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:51593a1f05c39332f623d64d910445fdec3d2ac2d96b37ce7f331882d5678ddf"}, + {file = "coverage-7.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69f1665165ba2fe7614e2f0c1aed71e14d83510bf67e2ee13df467d1c08bf1e8"}, + {file = "coverage-7.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3c8bbb95a699c80a167478478efe5e09ad31680931ec280bf2087905e3b95ec"}, + {file = "coverage-7.4.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:175f56572f25e1e1201d2b3e07b71ca4d201bf0b9cb8fad3f1dfae6a4188de86"}, + {file = "coverage-7.4.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8562ca91e8c40864942615b1d0b12289d3e745e6b2da901d133f52f2d510a1e3"}, + {file = "coverage-7.4.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d9a1ef0f173e1a19738f154fb3644f90d0ada56fe6c9b422f992b04266c55d5a"}, + {file = "coverage-7.4.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f40ac873045db4fd98a6f40387d242bde2708a3f8167bd967ccd43ad46394ba2"}, + {file = "coverage-7.4.2-cp38-cp38-win32.whl", hash = "sha256:d1b750a8409bec61caa7824bfd64a8074b6d2d420433f64c161a8335796c7c6b"}, + {file = "coverage-7.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b4ae777bebaed89e3a7e80c4a03fac434a98a8abb5251b2a957d38fe3fd30088"}, + {file = "coverage-7.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3ff7f92ae5a456101ca8f48387fd3c56eb96353588e686286f50633a611afc95"}, + {file = "coverage-7.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:861d75402269ffda0b33af94694b8e0703563116b04c681b1832903fac8fd647"}, + {file = "coverage-7.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3507427d83fa961cbd73f11140f4a5ce84208d31756f7238d6257b2d3d868405"}, + {file = "coverage-7.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bf711d517e21fb5bc429f5c4308fbc430a8585ff2a43e88540264ae87871e36a"}, + {file = "coverage-7.4.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c00e54f0bd258ab25e7f731ca1d5144b0bf7bec0051abccd2bdcff65fa3262c9"}, + {file = "coverage-7.4.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f8e845d894e39fb53834da826078f6dc1a933b32b1478cf437007367efaf6f6a"}, + {file = "coverage-7.4.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:840456cb1067dc350af9080298c7c2cfdddcedc1cb1e0b30dceecdaf7be1a2d3"}, + {file = "coverage-7.4.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c11ca2df2206a4e3e4c4567f52594637392ed05d7c7fb73b4ea1c658ba560265"}, + {file = "coverage-7.4.2-cp39-cp39-win32.whl", hash = "sha256:3ff5bdb08d8938d336ce4088ca1a1e4b6c8cd3bef8bb3a4c0eb2f37406e49643"}, + {file = "coverage-7.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:ac9e95cefcf044c98d4e2c829cd0669918585755dd9a92e28a1a7012322d0a95"}, + {file = "coverage-7.4.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:f593a4a90118d99014517c2679e04a4ef5aee2d81aa05c26c734d271065efcb6"}, + {file = "coverage-7.4.2.tar.gz", hash = "sha256:1a5ee18e3a8d766075ce9314ed1cb695414bae67df6a4b0805f5137d93d6f1cb"}, ] [package.dependencies] @@ -708,43 +708,43 @@ toml = ["tomli"] [[package]] name = "cryptography" -version = "42.0.0" +version = "42.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-42.0.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:c640b0ef54138fde761ec99a6c7dc4ce05e80420262c20fa239e694ca371d434"}, - {file = "cryptography-42.0.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:678cfa0d1e72ef41d48993a7be75a76b0725d29b820ff3cfd606a5b2b33fda01"}, - {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:146e971e92a6dd042214b537a726c9750496128453146ab0ee8971a0299dc9bd"}, - {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87086eae86a700307b544625e3ba11cc600c3c0ef8ab97b0fda0705d6db3d4e3"}, - {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0a68bfcf57a6887818307600c3c0ebc3f62fbb6ccad2240aa21887cda1f8df1b"}, - {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5a217bca51f3b91971400890905a9323ad805838ca3fa1e202a01844f485ee87"}, - {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ca20550bb590db16223eb9ccc5852335b48b8f597e2f6f0878bbfd9e7314eb17"}, - {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:33588310b5c886dfb87dba5f013b8d27df7ffd31dc753775342a1e5ab139e59d"}, - {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9515ea7f596c8092fdc9902627e51b23a75daa2c7815ed5aa8cf4f07469212ec"}, - {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:35cf6ed4c38f054478a9df14f03c1169bb14bd98f0b1705751079b25e1cb58bc"}, - {file = "cryptography-42.0.0-cp37-abi3-win32.whl", hash = "sha256:8814722cffcfd1fbd91edd9f3451b88a8f26a5fd41b28c1c9193949d1c689dc4"}, - {file = "cryptography-42.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:a2a8d873667e4fd2f34aedab02ba500b824692c6542e017075a2efc38f60a4c0"}, - {file = "cryptography-42.0.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:8fedec73d590fd30c4e3f0d0f4bc961aeca8390c72f3eaa1a0874d180e868ddf"}, - {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be41b0c7366e5549265adf2145135dca107718fa44b6e418dc7499cfff6b4689"}, - {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca482ea80626048975360c8e62be3ceb0f11803180b73163acd24bf014133a0"}, - {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c58115384bdcfe9c7f644c72f10f6f42bed7cf59f7b52fe1bf7ae0a622b3a139"}, - {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:56ce0c106d5c3fec1038c3cca3d55ac320a5be1b44bf15116732d0bc716979a2"}, - {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:324721d93b998cb7367f1e6897370644751e5580ff9b370c0a50dc60a2003513"}, - {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:d97aae66b7de41cdf5b12087b5509e4e9805ed6f562406dfcf60e8481a9a28f8"}, - {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:85f759ed59ffd1d0baad296e72780aa62ff8a71f94dc1ab340386a1207d0ea81"}, - {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:206aaf42e031b93f86ad60f9f5d9da1b09164f25488238ac1dc488334eb5e221"}, - {file = "cryptography-42.0.0-cp39-abi3-win32.whl", hash = "sha256:74f18a4c8ca04134d2052a140322002fef535c99cdbc2a6afc18a8024d5c9d5b"}, - {file = "cryptography-42.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:14e4b909373bc5bf1095311fa0f7fcabf2d1a160ca13f1e9e467be1ac4cbdf94"}, - {file = "cryptography-42.0.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3005166a39b70c8b94455fdbe78d87a444da31ff70de3331cdec2c568cf25b7e"}, - {file = "cryptography-42.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:be14b31eb3a293fc6e6aa2807c8a3224c71426f7c4e3639ccf1a2f3ffd6df8c3"}, - {file = "cryptography-42.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:bd7cf7a8d9f34cc67220f1195884151426ce616fdc8285df9054bfa10135925f"}, - {file = "cryptography-42.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c310767268d88803b653fffe6d6f2f17bb9d49ffceb8d70aed50ad45ea49ab08"}, - {file = "cryptography-42.0.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bdce70e562c69bb089523e75ef1d9625b7417c6297a76ac27b1b8b1eb51b7d0f"}, - {file = "cryptography-42.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e9326ca78111e4c645f7e49cbce4ed2f3f85e17b61a563328c85a5208cf34440"}, - {file = "cryptography-42.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:69fd009a325cad6fbfd5b04c711a4da563c6c4854fc4c9544bff3088387c77c0"}, - {file = "cryptography-42.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:988b738f56c665366b1e4bfd9045c3efae89ee366ca3839cd5af53eaa1401bce"}, - {file = "cryptography-42.0.0.tar.gz", hash = "sha256:6cf9b76d6e93c62114bd19485e5cb003115c134cf9ce91f8ac924c44f8c8c3f4"}, + {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be"}, + {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2"}, + {file = "cryptography-42.0.2-cp37-abi3-win32.whl", hash = "sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee"}, + {file = "cryptography-42.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee"}, + {file = "cryptography-42.0.2-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33"}, + {file = "cryptography-42.0.2-cp39-abi3-win32.whl", hash = "sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635"}, + {file = "cryptography-42.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65"}, + {file = "cryptography-42.0.2.tar.gz", hash = "sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888"}, ] [package.dependencies] @@ -883,70 +883,60 @@ files = [ [[package]] name = "duckdb" -version = "0.9.2" -description = "DuckDB embedded database" +version = "0.10.0" +description = "DuckDB in-process database" optional = true python-versions = ">=3.7.0" files = [ - {file = "duckdb-0.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:aadcea5160c586704c03a8a796c06a8afffbefefb1986601104a60cb0bfdb5ab"}, - {file = "duckdb-0.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:08215f17147ed83cbec972175d9882387366de2ed36c21cbe4add04b39a5bcb4"}, - {file = "duckdb-0.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ee6c2a8aba6850abef5e1be9dbc04b8e72a5b2c2b67f77892317a21fae868fe7"}, - {file = "duckdb-0.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ff49f3da9399900fd58b5acd0bb8bfad22c5147584ad2427a78d937e11ec9d0"}, - {file = "duckdb-0.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd5ac5baf8597efd2bfa75f984654afcabcd698342d59b0e265a0bc6f267b3f0"}, - {file = "duckdb-0.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:81c6df905589a1023a27e9712edb5b724566587ef280a0c66a7ec07c8083623b"}, - {file = "duckdb-0.9.2-cp310-cp310-win32.whl", hash = "sha256:a298cd1d821c81d0dec8a60878c4b38c1adea04a9675fb6306c8f9083bbf314d"}, - {file = "duckdb-0.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:492a69cd60b6cb4f671b51893884cdc5efc4c3b2eb76057a007d2a2295427173"}, - {file = "duckdb-0.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:061a9ea809811d6e3025c5de31bc40e0302cfb08c08feefa574a6491e882e7e8"}, - {file = "duckdb-0.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a43f93be768af39f604b7b9b48891f9177c9282a408051209101ff80f7450d8f"}, - {file = "duckdb-0.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ac29c8c8f56fff5a681f7bf61711ccb9325c5329e64f23cb7ff31781d7b50773"}, - {file = "duckdb-0.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b14d98d26bab139114f62ade81350a5342f60a168d94b27ed2c706838f949eda"}, - {file = "duckdb-0.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:796a995299878913e765b28cc2b14c8e44fae2f54ab41a9ee668c18449f5f833"}, - {file = "duckdb-0.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6cb64ccfb72c11ec9c41b3cb6181b6fd33deccceda530e94e1c362af5f810ba1"}, - {file = "duckdb-0.9.2-cp311-cp311-win32.whl", hash = "sha256:930740cb7b2cd9e79946e1d3a8f66e15dc5849d4eaeff75c8788d0983b9256a5"}, - {file = "duckdb-0.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:c28f13c45006fd525001b2011cdf91fa216530e9751779651e66edc0e446be50"}, - {file = "duckdb-0.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fbce7bbcb4ba7d99fcec84cec08db40bc0dd9342c6c11930ce708817741faeeb"}, - {file = "duckdb-0.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15a82109a9e69b1891f0999749f9e3265f550032470f51432f944a37cfdc908b"}, - {file = "duckdb-0.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9490fb9a35eb74af40db5569d90df8a04a6f09ed9a8c9caa024998c40e2506aa"}, - {file = "duckdb-0.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:696d5c6dee86c1a491ea15b74aafe34ad2b62dcd46ad7e03b1d00111ca1a8c68"}, - {file = "duckdb-0.9.2-cp37-cp37m-win32.whl", hash = "sha256:4f0935300bdf8b7631ddfc838f36a858c1323696d8c8a2cecbd416bddf6b0631"}, - {file = "duckdb-0.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:0aab900f7510e4d2613263865570203ddfa2631858c7eb8cbed091af6ceb597f"}, - {file = "duckdb-0.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:7d8130ed6a0c9421b135d0743705ea95b9a745852977717504e45722c112bf7a"}, - {file = "duckdb-0.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:974e5de0294f88a1a837378f1f83330395801e9246f4e88ed3bfc8ada65dcbee"}, - {file = "duckdb-0.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4fbc297b602ef17e579bb3190c94d19c5002422b55814421a0fc11299c0c1100"}, - {file = "duckdb-0.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1dd58a0d84a424924a35b3772419f8cd78a01c626be3147e4934d7a035a8ad68"}, - {file = "duckdb-0.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11a1194a582c80dfb57565daa06141727e415ff5d17e022dc5f31888a5423d33"}, - {file = "duckdb-0.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:be45d08541002a9338e568dca67ab4f20c0277f8f58a73dfc1435c5b4297c996"}, - {file = "duckdb-0.9.2-cp38-cp38-win32.whl", hash = "sha256:dd6f88aeb7fc0bfecaca633629ff5c986ac966fe3b7dcec0b2c48632fd550ba2"}, - {file = "duckdb-0.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:28100c4a6a04e69aa0f4a6670a6d3d67a65f0337246a0c1a429f3f28f3c40b9a"}, - {file = "duckdb-0.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7ae5bf0b6ad4278e46e933e51473b86b4b932dbc54ff097610e5b482dd125552"}, - {file = "duckdb-0.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e5d0bb845a80aa48ed1fd1d2d285dd352e96dc97f8efced2a7429437ccd1fe1f"}, - {file = "duckdb-0.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ce262d74a52500d10888110dfd6715989926ec936918c232dcbaddb78fc55b4"}, - {file = "duckdb-0.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6935240da090a7f7d2666f6d0a5e45ff85715244171ca4e6576060a7f4a1200e"}, - {file = "duckdb-0.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5cfb93e73911696a98b9479299d19cfbc21dd05bb7ab11a923a903f86b4d06e"}, - {file = "duckdb-0.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:64e3bc01751f31e7572d2716c3e8da8fe785f1cdc5be329100818d223002213f"}, - {file = "duckdb-0.9.2-cp39-cp39-win32.whl", hash = "sha256:6e5b80f46487636368e31b61461940e3999986359a78660a50dfdd17dd72017c"}, - {file = "duckdb-0.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:e6142a220180dbeea4f341708bd5f9501c5c962ce7ef47c1cadf5e8810b4cb13"}, - {file = "duckdb-0.9.2.tar.gz", hash = "sha256:3843afeab7c3fc4a4c0b53686a4cc1d9cdbdadcbb468d60fef910355ecafd447"}, -] - -[[package]] -name = "ecdsa" -version = "0.18.0" -description = "ECDSA cryptographic signature library (pure python)" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "ecdsa-0.18.0-py2.py3-none-any.whl", hash = "sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd"}, - {file = "ecdsa-0.18.0.tar.gz", hash = "sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49"}, + {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bd0ffb3fddef0f72a150e4d76e10942a84a1a0447d10907df1621b90d6668060"}, + {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f3d709d5c7c1a12b5e10d0b05fa916c670cd2b50178e3696faa0cc16048a1745"}, + {file = "duckdb-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9114aa22ec5d591a20ce5184be90f49d8e5b5348ceaab21e102c54560d07a5f8"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a37877efadf39caf7cadde0f430fedf762751b9c54750c821e2f1316705a21"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87cbc9e1d9c3fc9f14307bea757f99f15f46843c0ab13a6061354410824ed41f"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f0bfec79fed387201550517d325dff4fad2705020bc139d936cab08b9e845662"}, + {file = "duckdb-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c5622134d2d9796b15e09de810e450859d4beb46d9b861357ec9ae40a61b775c"}, + {file = "duckdb-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:089ee8e831ccaef1b73fc89c43b661567175eed0115454880bafed5e35cda702"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a05af63747f1d7021995f0811c333dee7316cec3b06c0d3e4741b9bdb678dd21"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:072d6eba5d8a59e0069a8b5b4252fed8a21f9fe3f85a9129d186a39b3d0aea03"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a77b85668f59b919042832e4659538337f1c7f197123076c5311f1c9cf077df7"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96a666f1d2da65d03199a977aec246920920a5ea1da76b70ae02bd4fb1ffc48c"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ec76a4262b783628d26612d184834852d9c92fb203e91af789100c17e3d7173"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009dd9d2cdbd3b061a9efbdfc79f2d1a8377bcf49f1e5f430138621f8c083a6c"}, + {file = "duckdb-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:878f06766088090dad4a2e5ee0081555242b2e8dcb29415ecc97e388cf0cf8d8"}, + {file = "duckdb-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:713ff0a1fb63a6d60f454acf67f31656549fb5d63f21ac68314e4f522daa1a89"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9c0ee450dfedfb52dd4957244e31820feef17228da31af6d052979450a80fd19"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ff79b2ea9994398b545c0d10601cd73565fbd09f8951b3d8003c7c5c0cebc7cb"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6bdf1aa71b924ef651062e6b8ff9981ad85bec89598294af8a072062c5717340"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0265bbc8216be3ced7b377ba8847128a3fc0ef99798a3c4557c1b88e3a01c23"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d418a315a07707a693bd985274c0f8c4dd77015d9ef5d8d3da4cc1942fd82e0"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2828475a292e68c71855190b818aded6bce7328f79e38c04a0c75f8f1c0ceef0"}, + {file = "duckdb-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c3aaeaae2eba97035c65f31ffdb18202c951337bf2b3d53d77ce1da8ae2ecf51"}, + {file = "duckdb-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:c51790aaaea97d8e4a58a114c371ed8d2c4e1ca7cbf29e3bdab6d8ccfc5afc1e"}, + {file = "duckdb-0.10.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8af1ae7cc77a12206b6c47ade191882cc8f49f750bb3e72bb86ac1d4fa89926a"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa4f7e8e8dc0e376aeb280b83f2584d0e25ec38985c27d19f3107b2edc4f4a97"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28ae942a79fad913defa912b56483cd7827a4e7721f4ce4bc9025b746ecb3c89"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:01b57802898091455ca2a32c1335aac1e398da77c99e8a96a1e5de09f6a0add9"}, + {file = "duckdb-0.10.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:52e1ad4a55fa153d320c367046b9500578192e01c6d04308ba8b540441736f2c"}, + {file = "duckdb-0.10.0-cp37-cp37m-win_amd64.whl", hash = "sha256:904c47d04095af745e989c853f0bfc0776913dfc40dfbd2da7afdbbb5f67fed0"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:184ae7ea5874f3b8fa51ab0f1519bdd088a0b78c32080ee272b1d137e2c8fd9c"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bd33982ecc9bac727a032d6cedced9f19033cbad56647147408891eb51a6cb37"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f59bf0949899105dd5f8864cb48139bfb78454a8c017b8258ba2b5e90acf7afc"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:395f3b18948001e35dceb48a4423d574e38656606d033eef375408b539e7b076"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b8eb2b803be7ee1df70435c33b03a4598cdaf676cd67ad782b288dcff65d781"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:31b2ddd331801064326c8e3587a4db8a31d02aef11332c168f45b3bd92effb41"}, + {file = "duckdb-0.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c8b89e76a041424b8c2026c5dc1f74b53fbbc6c6f650d563259885ab2e7d093d"}, + {file = "duckdb-0.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:79084a82f16c0a54f6bfb7ded5600400c2daa90eb0d83337d81a56924eaee5d4"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:79799b3a270dcd9070f677ba510f1e66b112df3068425691bac97c5e278929c7"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8fc394bfe3434920cdbcfbdd0ac3ba40902faa1dbda088db0ba44003a45318a"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c116605551b4abf5786243a59bcef02bd69cc51837d0c57cafaa68cdc428aa0c"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3191170c3b0a43b0c12644800326f5afdea00d5a4621d59dbbd0c1059139e140"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fee69a50eb93c72dc77e7ab1fabe0c38d21a52c5da44a86aa217081e38f9f1bd"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c5f449e87dacb16b0d145dbe65fa6fdb5a55b2b6911a46d74876e445dd395bac"}, + {file = "duckdb-0.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4487d0df221b17ea4177ad08131bc606b35f25cfadf890987833055b9d10cdf6"}, + {file = "duckdb-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:c099ae2ff8fe939fda62da81704f91e2f92ac45e48dc0e37c679c9d243d01e65"}, + {file = "duckdb-0.10.0.tar.gz", hash = "sha256:c02bcc128002aa79e3c9d89b9de25e062d1096a8793bc0d7932317b7977f6845"}, ] -[package.dependencies] -six = ">=1.9.0" - -[package.extras] -gmpy = ["gmpy"] -gmpy2 = ["gmpy2"] - [[package]] name = "exceptiongroup" version = "1.2.0" @@ -963,42 +953,42 @@ test = ["pytest (>=6)"] [[package]] name = "fastavro" -version = "1.9.3" +version = "1.9.4" description = "Fast read/write of AVRO files" optional = false python-versions = ">=3.8" files = [ - {file = "fastavro-1.9.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:5e9b2e1427fb84c0754bc34923d10cabcf2ed23230201208a1371ab7b6027674"}, - {file = "fastavro-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4ef82f86ae276309abc0072598474b6be68105a0b28f8d7cc0398d1d353d7de"}, - {file = "fastavro-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:280ef7ab7232ecb2097038d6842416ec717d0e1c314b80ff245f85201f3396a4"}, - {file = "fastavro-1.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4a36cfc0421ed7576ecb1c22de7bd1dedcce62aebbffcc597379d59171e5d76e"}, - {file = "fastavro-1.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d80f2e20199140eb8c036b4393e9bc9eff325543311b958c72318999499d4279"}, - {file = "fastavro-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:a435f7edd7c5b52cee3f23ca950cd9373ab35cf2aa3d269b3d6aca7e2fc1372c"}, - {file = "fastavro-1.9.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a7053ed10194ec53754f5337b57b3273a74b48505edcd6edb79fe3c4cd259c0"}, - {file = "fastavro-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853e01f13534d1baa0a3d493a8573e665e93ffa35b4bf1d125e21764d343af8e"}, - {file = "fastavro-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5a279cda25d876e6f120950cadf184a307fd8998f9a22a90bb62e6749f88d1e"}, - {file = "fastavro-1.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:63d6f928840f3fb1f2e1fe20bc8b7d0e1a51ba4bb0e554ecb837a669fba31288"}, - {file = "fastavro-1.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8807046edc78f50b3ea5f55f6a534c87b2a13538e7c56fec3532ef802bcae333"}, - {file = "fastavro-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:e502579da4a51c5630eadbd811a1b3d262d6e783bf19998cfb33d2ea0cf6f516"}, - {file = "fastavro-1.9.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:6b665efe442061df8d9608c2fb692847df85d52ad825b776c441802f0dfa6571"}, - {file = "fastavro-1.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b8c96d81f0115633489d7f1133a03832922629a61ca81c1d47b482ddcda3b94"}, - {file = "fastavro-1.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:338c7ec94dd2474c4679e44d2560a1922cb6fa99acbb7b18957264baf8eadfc7"}, - {file = "fastavro-1.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a509b34c9af71a109c633631ac2f6d2209830e13200d0048f7e9c057fd563f8f"}, - {file = "fastavro-1.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:967edefab470987c024cd5a1fcd04744a50a91e740c7bdf325181043a47f1083"}, - {file = "fastavro-1.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:033c15e8ed02f80f01d58be1cd880b09fd444faf277263d563a727711d47a98a"}, - {file = "fastavro-1.9.3-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:6b38723327603d77080aec56628e13a739415f8596ca0cc41a905615977c6d6b"}, - {file = "fastavro-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:046d75c4400941fd08f0a6855a34ae63bf02ea01f366b5b749942abe10640056"}, - {file = "fastavro-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87ab312b8baf0e61ee717878d390022ee1b713d70b244d69efbf3325680f9749"}, - {file = "fastavro-1.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c562fcf8f5091a2446aafd0c2a0da590c24e0b53527a0100d33908e32f20eea8"}, - {file = "fastavro-1.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2aa0111e7ebd076d2a094862bbdf8ea175cebba148fcce6c89ff46b625e334b4"}, - {file = "fastavro-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:652072e0f455ca19a1ee502b527e603389783657c130d81f89df66775979d6f5"}, - {file = "fastavro-1.9.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:0a57cdd4edaee36d4216faf801ebc7f53f45e4e1518bdd9832d6f6f1d6e2d88f"}, - {file = "fastavro-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b46a18ebed61573b0823c28eda2716485d283258a83659c7fe6ad3aaeacfed4"}, - {file = "fastavro-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f756f0723f3bd97db20437d0a8e45712839e6ccd7c82f4d82469533be48b4c7"}, - {file = "fastavro-1.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d98d5a08063f5b6d7ac5016a0dfe0698b50d9987cb74686f7dfa8288b7b09e0b"}, - {file = "fastavro-1.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:00698e60db58a2d52cb709df882d451fb7664ebb2f8cb37d9171697e060dc767"}, - {file = "fastavro-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:d021bbc135023194688e88a7431fb0b5e3ce20e27153bf258f2ce08ee1a0106b"}, - {file = "fastavro-1.9.3.tar.gz", hash = "sha256:a30d3d2353f6d3b4f6dcd6a97ae937b3775faddd63f5856fe11ba3b0dbb1756a"}, + {file = "fastavro-1.9.4-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:60cb38f07462a7fb4e4440ed0de67d3d400ae6b3d780f81327bebde9aa55faef"}, + {file = "fastavro-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:063d01d197fc929c20adc09ca9f0ca86d33ac25ee0963ce0b438244eee8315ae"}, + {file = "fastavro-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87a9053fcfbc895f2a16a4303af22077e3a8fdcf1cd5d6ed47ff2ef22cbba2f0"}, + {file = "fastavro-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:02bf1276b7326397314adf41b34a4890f6ffa59cf7e0eb20b9e4ab0a143a1598"}, + {file = "fastavro-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:56bed9eca435389a8861e6e2d631ec7f8f5dda5b23f93517ac710665bd34ca29"}, + {file = "fastavro-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:0cd2099c8c672b853e0b20c13e9b62a69d3fbf67ee7c59c7271ba5df1680310d"}, + {file = "fastavro-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:af8c6d8c43a02b5569c093fc5467469541ac408c79c36a5b0900d3dd0b3ba838"}, + {file = "fastavro-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4a138710bd61580324d23bc5e3df01f0b82aee0a76404d5dddae73d9e4c723f"}, + {file = "fastavro-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:903d97418120ca6b6a7f38a731166c1ccc2c4344ee5e0470d09eb1dc3687540a"}, + {file = "fastavro-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c443eeb99899d062dbf78c525e4614dd77e041a7688fa2710c224f4033f193ae"}, + {file = "fastavro-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ac26ab0774d1b2b7af6d8f4300ad20bbc4b5469e658a02931ad13ce23635152f"}, + {file = "fastavro-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:cf7247874c22be856ba7d1f46a0f6e0379a6025f1a48a7da640444cbac6f570b"}, + {file = "fastavro-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:68912f2020e1b3d70557260b27dd85fb49a4fc6bfab18d384926127452c1da4c"}, + {file = "fastavro-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6925ce137cdd78e109abdb0bc33aad55de6c9f2d2d3036b65453128f2f5f5b92"}, + {file = "fastavro-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b928cd294e36e35516d0deb9e104b45be922ba06940794260a4e5dbed6c192a"}, + {file = "fastavro-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:90c9838bc4c991ffff5dd9d88a0cc0030f938b3fdf038cdf6babde144b920246"}, + {file = "fastavro-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:eca6e54da571b06a3c5a72dbb7212073f56c92a6fbfbf847b91c347510f8a426"}, + {file = "fastavro-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a4b02839ac261100cefca2e2ad04cdfedc556cb66b5ec735e0db428e74b399de"}, + {file = "fastavro-1.9.4-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:4451ee9a305a73313a1558d471299f3130e4ecc10a88bf5742aa03fb37e042e6"}, + {file = "fastavro-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8524fccfb379565568c045d29b2ebf71e1f2c0dd484aeda9fe784ef5febe1a8"}, + {file = "fastavro-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33d0a00a6e09baa20f6f038d7a2ddcb7eef0e7a9980e947a018300cb047091b8"}, + {file = "fastavro-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:23d7e5b29c9bf6f26e8be754b2c8b919838e506f78ef724de7d22881696712fc"}, + {file = "fastavro-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2e6ab3ee53944326460edf1125b2ad5be2fadd80f7211b13c45fa0c503b4cf8d"}, + {file = "fastavro-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:64d335ec2004204c501f8697c385d0a8f6b521ac82d5b30696f789ff5bc85f3c"}, + {file = "fastavro-1.9.4-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:7e05f44c493e89e73833bd3ff3790538726906d2856f59adc8103539f4a1b232"}, + {file = "fastavro-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:253c63993250bff4ee7b11fb46cf3a4622180a783bedc82a24c6fdcd1b10ca2a"}, + {file = "fastavro-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24d6942eb1db14640c2581e0ecd1bbe0afc8a83731fcd3064ae7f429d7880cb7"}, + {file = "fastavro-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d47bb66be6091cd48cfe026adcad11c8b11d7d815a2949a1e4ccf03df981ca65"}, + {file = "fastavro-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c293897f12f910e58a1024f9c77f565aa8e23b36aafda6ad8e7041accc57a57f"}, + {file = "fastavro-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:f05d2afcb10a92e2a9e580a3891f090589b3e567fdc5641f8a46a0b084f120c3"}, + {file = "fastavro-1.9.4.tar.gz", hash = "sha256:56b8363e360a1256c94562393dc7f8611f3baf2b3159f64fb2b9c6b87b14e876"}, ] [package.extras] @@ -1157,6 +1147,10 @@ files = [ {file = "fsspec-2023.12.2.tar.gz", hash = "sha256:8548d39e8810b59c38014934f6b31e57f40c1b20f911f4cc2b85389c7e9bf0cb"}, ] +[package.dependencies] +aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""} +requests = {version = "*", optional = true, markers = "extra == \"http\""} + [package.extras] abfs = ["adlfs"] adl = ["adlfs"] @@ -1205,6 +1199,38 @@ requests = "*" crc = ["crcmod"] gcsfuse = ["fusepy"] +[[package]] +name = "getdaft" +version = "0.2.16" +description = "Distributed Dataframes for Multimodal Data" +optional = true +python-versions = ">=3.7" +files = [ + {file = "getdaft-0.2.16-cp37-abi3-macosx_10_7_x86_64.whl", hash = "sha256:0a355301e79e00ab639150b84d380465f5f69ef9e6f36f1b5cf376e3d24229f6"}, + {file = "getdaft-0.2.16-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:317a8dff8169638cea40efbc01193d51f31c4ab441fc39f01f163f197fc264a2"}, + {file = "getdaft-0.2.16-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb1e5a62fcbb4a909532bb64dc7af56e7ac3fef1b8220448fcae1a8af0c6bc4"}, + {file = "getdaft-0.2.16-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b932510fd24b4f1f021abd67016bbcdacda3315d0e3ee2a8e339d82719adbd51"}, + {file = "getdaft-0.2.16-cp37-abi3-win_amd64.whl", hash = "sha256:c3e74f56b211f88e5c335276fe4670a0dfac8dc8b5c684b22fc570b1350cc40d"}, + {file = "getdaft-0.2.16.tar.gz", hash = "sha256:3fc7b2c3373bc374a90ecc566c6f0d830b9ce751d6c930c96b70b2c4c2afa0c4"}, +] + +[package.dependencies] +fsspec = {version = "*", extras = ["http"]} +psutil = "*" +pyarrow = ">=6.0.1" +tqdm = "*" +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +all = ["getdaft[aws,azure,gcp,iceberg,numpy,pandas,ray]"] +aws = ["s3fs"] +azure = ["adlfs"] +gcp = ["gcsfs"] +iceberg = ["packaging", "pyiceberg (>=0.4.0)"] +numpy = ["numpy"] +pandas = ["pandas"] +ray = ["packaging", "ray[client,data] (>=2.0.0)"] + [[package]] name = "google-api-core" version = "2.15.0" @@ -1631,6 +1657,23 @@ files = [ {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] +[[package]] +name = "joserfc" +version = "0.9.0" +description = "The ultimate Python library for JOSE RFCs, including JWS, JWE, JWK, JWA, JWT" +optional = false +python-versions = ">=3.8" +files = [ + {file = "joserfc-0.9.0-py3-none-any.whl", hash = "sha256:4026bdbe2c196cd40574e916fa1e28874d99649412edaab0e373dec3077153fb"}, + {file = "joserfc-0.9.0.tar.gz", hash = "sha256:eebca7f587b1761ce43a98ffd5327f2b600b9aa5bb0a77b947687f503ad43bc0"}, +] + +[package.dependencies] +cryptography = "*" + +[package.extras] +drafts = ["pycryptodome"] + [[package]] name = "jschema-to-python" version = "1.2.3" @@ -1963,65 +2006,57 @@ files = [ [[package]] name = "moto" -version = "4.2.13" +version = "5.0.2" description = "" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "moto-4.2.13-py2.py3-none-any.whl", hash = "sha256:93e0fd13b624bd79115494f833308c3641b2be0fc9f4f18aa9264aa01f6168e0"}, - {file = "moto-4.2.13.tar.gz", hash = "sha256:01aef6a489a725c8d725bd3dc6f70ff1bedaee3e2641752e4b471ff0ede4b4d7"}, + {file = "moto-5.0.2-py2.py3-none-any.whl", hash = "sha256:71bb832a18b64f10fc4cec117b9b0e2305e5831d9a17eb74f6b9819ed7613843"}, + {file = "moto-5.0.2.tar.gz", hash = "sha256:7e27395e5c63ff9554ae14b5baa41bfe6d6b1be0e59eb02977c6ce28411246de"}, ] [package.dependencies] aws-xray-sdk = {version = ">=0.93,<0.96 || >0.96", optional = true, markers = "extra == \"server\""} boto3 = ">=1.9.201" -botocore = ">=1.12.201" +botocore = ">=1.14.0" cfn-lint = {version = ">=0.40.0", optional = true, markers = "extra == \"server\""} cryptography = ">=3.3.1" docker = {version = ">=3.0.0", optional = true, markers = "extra == \"server\""} -ecdsa = {version = "!=0.15", optional = true, markers = "extra == \"server\""} flask = {version = "<2.2.0 || >2.2.0,<2.2.1 || >2.2.1", optional = true, markers = "extra == \"server\""} flask-cors = {version = "*", optional = true, markers = "extra == \"server\""} graphql-core = {version = "*", optional = true, markers = "extra == \"server\""} Jinja2 = ">=2.10.1" +joserfc = {version = ">=0.9.0", optional = true, markers = "extra == \"server\""} jsondiff = {version = ">=1.1.2", optional = true, markers = "extra == \"server\""} openapi-spec-validator = {version = ">=0.5.0", optional = true, markers = "extra == \"server\""} -py-partiql-parser = {version = "0.5.0", optional = true, markers = "extra == \"server\""} +py-partiql-parser = {version = "0.5.1", optional = true, markers = "extra == \"server\""} pyparsing = {version = ">=3.0.7", optional = true, markers = "extra == \"server\""} python-dateutil = ">=2.1,<3.0.0" -python-jose = {version = ">=3.1.0,<4.0.0", extras = ["cryptography"], optional = true, markers = "extra == \"server\""} PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"server\""} requests = ">=2.5" -responses = ">=0.13.0" +responses = ">=0.15.0" setuptools = {version = "*", optional = true, markers = "extra == \"server\""} -sshpubkeys = {version = ">=3.1.0", optional = true, markers = "extra == \"server\""} werkzeug = ">=0.5,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1" xmltodict = "*" [package.extras] -all = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] -apigateway = ["PyYAML (>=5.1)", "ecdsa (!=0.15)", "openapi-spec-validator (>=0.5.0)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] -apigatewayv2 = ["PyYAML (>=5.1)"] +all = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.1)", "pyparsing (>=3.0.7)", "setuptools"] +apigateway = ["PyYAML (>=5.1)", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)"] +apigatewayv2 = ["PyYAML (>=5.1)", "openapi-spec-validator (>=0.5.0)"] appsync = ["graphql-core"] awslambda = ["docker (>=3.0.0)"] batch = ["docker (>=3.0.0)"] -cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] -cognitoidp = ["ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] -ds = ["sshpubkeys (>=3.1.0)"] -dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.0)"] -dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.0)"] -ebs = ["sshpubkeys (>=3.1.0)"] -ec2 = ["sshpubkeys (>=3.1.0)"] -efs = ["sshpubkeys (>=3.1.0)"] -eks = ["sshpubkeys (>=3.1.0)"] +cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.1)", "pyparsing (>=3.0.7)", "setuptools"] +cognitoidp = ["joserfc (>=0.9.0)"] +dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.1)"] +dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.1)"] glue = ["pyparsing (>=3.0.7)"] iotdata = ["jsondiff (>=1.1.2)"] -proxy = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] -resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "sshpubkeys (>=3.1.0)"] -route53resolver = ["sshpubkeys (>=3.1.0)"] -s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.5.0)"] -s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.5.0)"] -server = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "ecdsa (!=0.15)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] +proxy = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.1)", "pyparsing (>=3.0.7)", "setuptools"] +resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.1)", "pyparsing (>=3.0.7)"] +s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.5.1)"] +s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.5.1)"] +server = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.1)", "pyparsing (>=3.0.7)", "setuptools"] ssm = ["PyYAML (>=5.1)"] xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"] @@ -2230,13 +2265,13 @@ files = [ [[package]] name = "mypy-boto3-glue" -version = "1.34.7" -description = "Type annotations for boto3.Glue 1.34.7 service generated with mypy-boto3-builder 7.23.0" +version = "1.34.35" +description = "Type annotations for boto3.Glue 1.34.35 service generated with mypy-boto3-builder 7.23.1" optional = true python-versions = ">=3.8" files = [ - {file = "mypy-boto3-glue-1.34.7.tar.gz", hash = "sha256:057a082527e3884ec72b499573094af9dbe0bd3639b26772ec883a17d2c22d12"}, - {file = "mypy_boto3_glue-1.34.7-py3-none-any.whl", hash = "sha256:59e3cced5c3367f0eb2136b62efeb7914f2779a17e574e3c965d5567c7c63de1"}, + {file = "mypy-boto3-glue-1.34.35.tar.gz", hash = "sha256:f8abe4f2e07d299a7b9b0dec30033a9877414e790ee49f274950a5b6d9dd3036"}, + {file = "mypy_boto3_glue-1.34.35-py3-none-any.whl", hash = "sha256:361b21c108ffae7868a25256aaeeca6f28fd1072cbeebebb9c4c4dbc3cbdc721"}, ] [package.dependencies] @@ -2311,51 +2346,6 @@ files = [ {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, ] -[[package]] -name = "numpy" -version = "1.26.3" -description = "Fundamental package for array computing in Python" -optional = true -python-versions = ">=3.9" -files = [ - {file = "numpy-1.26.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf"}, - {file = "numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd"}, - {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6"}, - {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b"}, - {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178"}, - {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485"}, - {file = "numpy-1.26.3-cp310-cp310-win32.whl", hash = "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3"}, - {file = "numpy-1.26.3-cp310-cp310-win_amd64.whl", hash = "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce"}, - {file = "numpy-1.26.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374"}, - {file = "numpy-1.26.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6"}, - {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2"}, - {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda"}, - {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e"}, - {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00"}, - {file = "numpy-1.26.3-cp311-cp311-win32.whl", hash = "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b"}, - {file = "numpy-1.26.3-cp311-cp311-win_amd64.whl", hash = "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4"}, - {file = "numpy-1.26.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13"}, - {file = "numpy-1.26.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e"}, - {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3"}, - {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419"}, - {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166"}, - {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36"}, - {file = "numpy-1.26.3-cp312-cp312-win32.whl", hash = "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511"}, - {file = "numpy-1.26.3-cp312-cp312-win_amd64.whl", hash = "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b"}, - {file = "numpy-1.26.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f"}, - {file = "numpy-1.26.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f"}, - {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b"}, - {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137"}, - {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58"}, - {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb"}, - {file = "numpy-1.26.3-cp39-cp39-win32.whl", hash = "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03"}, - {file = "numpy-1.26.3-cp39-cp39-win_amd64.whl", hash = "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5"}, - {file = "numpy-1.26.3.tar.gz", hash = "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4"}, -] - [[package]] name = "oauthlib" version = "3.2.2" @@ -2606,6 +2596,34 @@ files = [ {file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"}, ] +[[package]] +name = "psutil" +version = "5.9.8" +description = "Cross-platform lib for process and system monitoring in Python." +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, + {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73"}, + {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7"}, + {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36"}, + {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d"}, + {file = "psutil-5.9.8-cp27-none-win32.whl", hash = "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e"}, + {file = "psutil-5.9.8-cp27-none-win_amd64.whl", hash = "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631"}, + {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"}, + {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"}, + {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"}, + {file = "psutil-5.9.8-cp36-cp36m-win32.whl", hash = "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee"}, + {file = "psutil-5.9.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2"}, + {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"}, + {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"}, + {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"}, + {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"}, +] + +[package.extras] +test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] + [[package]] name = "psycopg2-binary" version = "2.9.9" @@ -2689,13 +2707,13 @@ files = [ [[package]] name = "py-partiql-parser" -version = "0.5.0" +version = "0.5.1" description = "Pure Python PartiQL Parser" optional = false python-versions = "*" files = [ - {file = "py-partiql-parser-0.5.0.tar.gz", hash = "sha256:427a662e87d51a0a50150fc8b75c9ebb4a52d49129684856c40c88b8c8e027e4"}, - {file = "py_partiql_parser-0.5.0-py3-none-any.whl", hash = "sha256:dc454c27526adf62deca5177ea997bf41fac4fd109c5d4c8d81f984de738ba8f"}, + {file = "py-partiql-parser-0.5.1.tar.gz", hash = "sha256:aeac8f46529d8651bbae88a1a6c14dc3aa38ebc4bc6bd1eb975044c0564246c6"}, + {file = "py_partiql_parser-0.5.1-py3-none-any.whl", hash = "sha256:53053e70987dea2983e1990ad85f87a7d8cec13dd4a4b065a740bcfd661f5a6b"}, ] [package.extras] @@ -2764,7 +2782,7 @@ numpy = ">=1.16.6,<2" name = "pyasn1" version = "0.5.1" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" -optional = false +optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ {file = "pyasn1-0.5.1-py2.py3-none-any.whl", hash = "sha256:4439847c58d40b1d0a573d07e3856e95333f1976294494c325775aeca506eb58"}, @@ -2798,18 +2816,18 @@ files = [ [[package]] name = "pydantic" -version = "2.5.3" +version = "2.6.1" description = "Data validation using Python type hints" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"}, - {file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"}, + {file = "pydantic-2.6.1-py3-none-any.whl", hash = "sha256:0b6a909df3192245cb736509a92ff69e4fef76116feffec68e93a567347bae6f"}, + {file = "pydantic-2.6.1.tar.gz", hash = "sha256:4fd5c182a2488dc63e6d32737ff19937888001e2a6d86e94b3f233104a5d1fa9"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.14.6" +pydantic-core = "2.16.2" typing-extensions = ">=4.6.1" [package.extras] @@ -2817,116 +2835,90 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.14.6" +version = "2.16.2" description = "" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"}, - {file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"}, - {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"}, - {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"}, - {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"}, - {file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"}, - {file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"}, - {file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"}, - {file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"}, - {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"}, - {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"}, - {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"}, - {file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"}, - {file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"}, - {file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"}, - {file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"}, - {file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"}, - {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"}, - {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"}, - {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"}, - {file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"}, - {file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"}, - {file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"}, - {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"}, - {file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"}, - {file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"}, - {file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"}, - {file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"}, - {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"}, - {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"}, - {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"}, - {file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"}, - {file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"}, - {file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"}, - {file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"}, - {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"}, - {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"}, - {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"}, - {file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"}, - {file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"}, - {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"}, - {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"}, - {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"}, - {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"}, - {file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"}, + {file = "pydantic_core-2.16.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3fab4e75b8c525a4776e7630b9ee48aea50107fea6ca9f593c98da3f4d11bf7c"}, + {file = "pydantic_core-2.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8bde5b48c65b8e807409e6f20baee5d2cd880e0fad00b1a811ebc43e39a00ab2"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2924b89b16420712e9bb8192396026a8fbd6d8726224f918353ac19c4c043d2a"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16aa02e7a0f539098e215fc193c8926c897175d64c7926d00a36188917717a05"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:936a787f83db1f2115ee829dd615c4f684ee48ac4de5779ab4300994d8af325b"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:459d6be6134ce3b38e0ef76f8a672924460c455d45f1ad8fdade36796df1ddc8"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9ee4febb249c591d07b2d4dd36ebcad0ccd128962aaa1801508320896575ef"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40a0bd0bed96dae5712dab2aba7d334a6c67cbcac2ddfca7dbcc4a8176445990"}, + {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:870dbfa94de9b8866b37b867a2cb37a60c401d9deb4a9ea392abf11a1f98037b"}, + {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:308974fdf98046db28440eb3377abba274808bf66262e042c412eb2adf852731"}, + {file = "pydantic_core-2.16.2-cp310-none-win32.whl", hash = "sha256:a477932664d9611d7a0816cc3c0eb1f8856f8a42435488280dfbf4395e141485"}, + {file = "pydantic_core-2.16.2-cp310-none-win_amd64.whl", hash = "sha256:8f9142a6ed83d90c94a3efd7af8873bf7cefed2d3d44387bf848888482e2d25f"}, + {file = "pydantic_core-2.16.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:406fac1d09edc613020ce9cf3f2ccf1a1b2f57ab00552b4c18e3d5276c67eb11"}, + {file = "pydantic_core-2.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce232a6170dd6532096cadbf6185271e4e8c70fc9217ebe105923ac105da9978"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a90fec23b4b05a09ad988e7a4f4e081711a90eb2a55b9c984d8b74597599180f"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8aafeedb6597a163a9c9727d8a8bd363a93277701b7bfd2749fbefee2396469e"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9957433c3a1b67bdd4c63717eaf174ebb749510d5ea612cd4e83f2d9142f3fc8"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0d7a9165167269758145756db43a133608a531b1e5bb6a626b9ee24bc38a8f7"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dffaf740fe2e147fedcb6b561353a16243e654f7fe8e701b1b9db148242e1272"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8ed79883b4328b7f0bd142733d99c8e6b22703e908ec63d930b06be3a0e7113"}, + {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cf903310a34e14651c9de056fcc12ce090560864d5a2bb0174b971685684e1d8"}, + {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46b0d5520dbcafea9a8645a8164658777686c5c524d381d983317d29687cce97"}, + {file = "pydantic_core-2.16.2-cp311-none-win32.whl", hash = "sha256:70651ff6e663428cea902dac297066d5c6e5423fda345a4ca62430575364d62b"}, + {file = "pydantic_core-2.16.2-cp311-none-win_amd64.whl", hash = "sha256:98dc6f4f2095fc7ad277782a7c2c88296badcad92316b5a6e530930b1d475ebc"}, + {file = "pydantic_core-2.16.2-cp311-none-win_arm64.whl", hash = "sha256:ef6113cd31411eaf9b39fc5a8848e71c72656fd418882488598758b2c8c6dfa0"}, + {file = "pydantic_core-2.16.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:88646cae28eb1dd5cd1e09605680c2b043b64d7481cdad7f5003ebef401a3039"}, + {file = "pydantic_core-2.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b883af50eaa6bb3299780651e5be921e88050ccf00e3e583b1e92020333304b"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bf26c2e2ea59d32807081ad51968133af3025c4ba5753e6a794683d2c91bf6e"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99af961d72ac731aae2a1b55ccbdae0733d816f8bfb97b41909e143de735f522"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02906e7306cb8c5901a1feb61f9ab5e5c690dbbeaa04d84c1b9ae2a01ebe9379"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5362d099c244a2d2f9659fb3c9db7c735f0004765bbe06b99be69fbd87c3f15"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac426704840877a285d03a445e162eb258924f014e2f074e209d9b4ff7bf380"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b94cbda27267423411c928208e89adddf2ea5dd5f74b9528513f0358bba019cb"}, + {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6db58c22ac6c81aeac33912fb1af0e930bc9774166cdd56eade913d5f2fff35e"}, + {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396fdf88b1b503c9c59c84a08b6833ec0c3b5ad1a83230252a9e17b7dfb4cffc"}, + {file = "pydantic_core-2.16.2-cp312-none-win32.whl", hash = "sha256:7c31669e0c8cc68400ef0c730c3a1e11317ba76b892deeefaf52dcb41d56ed5d"}, + {file = "pydantic_core-2.16.2-cp312-none-win_amd64.whl", hash = "sha256:a3b7352b48fbc8b446b75f3069124e87f599d25afb8baa96a550256c031bb890"}, + {file = "pydantic_core-2.16.2-cp312-none-win_arm64.whl", hash = "sha256:a9e523474998fb33f7c1a4d55f5504c908d57add624599e095c20fa575b8d943"}, + {file = "pydantic_core-2.16.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:ae34418b6b389d601b31153b84dce480351a352e0bb763684a1b993d6be30f17"}, + {file = "pydantic_core-2.16.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:732bd062c9e5d9582a30e8751461c1917dd1ccbdd6cafb032f02c86b20d2e7ec"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b52776a2e3230f4854907a1e0946eec04d41b1fc64069ee774876bbe0eab55"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ef551c053692b1e39e3f7950ce2296536728871110e7d75c4e7753fb30ca87f4"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ebb892ed8599b23fa8f1799e13a12c87a97a6c9d0f497525ce9858564c4575a4"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa6c8c582036275997a733427b88031a32ffa5dfc3124dc25a730658c47a572f"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ba0884a91f1aecce75202473ab138724aa4fb26d7707f2e1fa6c3e68c84fbf"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7924e54f7ce5d253d6160090ddc6df25ed2feea25bfb3339b424a9dd591688bc"}, + {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69a7b96b59322a81c2203be537957313b07dd333105b73db0b69212c7d867b4b"}, + {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7e6231aa5bdacda78e96ad7b07d0c312f34ba35d717115f4b4bff6cb87224f0f"}, + {file = "pydantic_core-2.16.2-cp38-none-win32.whl", hash = "sha256:41dac3b9fce187a25c6253ec79a3f9e2a7e761eb08690e90415069ea4a68ff7a"}, + {file = "pydantic_core-2.16.2-cp38-none-win_amd64.whl", hash = "sha256:f685dbc1fdadb1dcd5b5e51e0a378d4685a891b2ddaf8e2bba89bd3a7144e44a"}, + {file = "pydantic_core-2.16.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:55749f745ebf154c0d63d46c8c58594d8894b161928aa41adbb0709c1fe78b77"}, + {file = "pydantic_core-2.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b30b0dd58a4509c3bd7eefddf6338565c4905406aee0c6e4a5293841411a1286"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18de31781cdc7e7b28678df7c2d7882f9692ad060bc6ee3c94eb15a5d733f8f7"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5864b0242f74b9dd0b78fd39db1768bc3f00d1ffc14e596fd3e3f2ce43436a33"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8f9186ca45aee030dc8234118b9c0784ad91a0bb27fc4e7d9d6608a5e3d386c"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc6f6c9be0ab6da37bc77c2dda5f14b1d532d5dbef00311ee6e13357a418e646"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa057095f621dad24a1e906747179a69780ef45cc8f69e97463692adbcdae878"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ad84731a26bcfb299f9eab56c7932d46f9cad51c52768cace09e92a19e4cf55"}, + {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3b052c753c4babf2d1edc034c97851f867c87d6f3ea63a12e2700f159f5c41c3"}, + {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0f686549e32ccdb02ae6f25eee40cc33900910085de6aa3790effd391ae10c2"}, + {file = "pydantic_core-2.16.2-cp39-none-win32.whl", hash = "sha256:7afb844041e707ac9ad9acad2188a90bffce2c770e6dc2318be0c9916aef1469"}, + {file = "pydantic_core-2.16.2-cp39-none-win_amd64.whl", hash = "sha256:9da90d393a8227d717c19f5397688a38635afec89f2e2d7af0df037f3249c39a"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f60f920691a620b03082692c378661947d09415743e437a7478c309eb0e4f82"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:47924039e785a04d4a4fa49455e51b4eb3422d6eaacfde9fc9abf8fdef164e8a"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6294e76b0380bb7a61eb8a39273c40b20beb35e8c87ee101062834ced19c545"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe56851c3f1d6f5384b3051c536cc81b3a93a73faf931f404fef95217cf1e10d"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9d776d30cde7e541b8180103c3f294ef7c1862fd45d81738d156d00551005784"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:72f7919af5de5ecfaf1eba47bf9a5d8aa089a3340277276e5636d16ee97614d7"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:4bfcbde6e06c56b30668a0c872d75a7ef3025dc3c1823a13cf29a0e9b33f67e8"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ff7c97eb7a29aba230389a2661edf2e9e06ce616c7e35aa764879b6894a44b25"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9b5f13857da99325dcabe1cc4e9e6a3d7b2e2c726248ba5dd4be3e8e4a0b6d0e"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a7e41e3ada4cca5f22b478c08e973c930e5e6c7ba3588fb8e35f2398cdcc1545"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60eb8ceaa40a41540b9acae6ae7c1f0a67d233c40dc4359c256ad2ad85bdf5e5"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7beec26729d496a12fd23cf8da9944ee338c8b8a17035a560b585c36fe81af20"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:22c5f022799f3cd6741e24f0443ead92ef42be93ffda0d29b2597208c94c3753"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:eca58e319f4fd6df004762419612122b2c7e7d95ffafc37e890252f869f3fb2a"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed957db4c33bc99895f3a1672eca7e80e8cda8bd1e29a80536b4ec2153fa9804"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:459c0d338cc55d099798618f714b21b7ece17eb1a87879f2da20a3ff4c7628e2"}, + {file = "pydantic_core-2.16.2.tar.gz", hash = "sha256:0ba503850d8b8dcc18391f10de896ae51d37fe5fe43dbfb6a35c5c5cad271a06"}, ] [package.dependencies] @@ -3102,28 +3094,6 @@ files = [ [package.dependencies] six = ">=1.5" -[[package]] -name = "python-jose" -version = "3.3.0" -description = "JOSE implementation in Python" -optional = false -python-versions = "*" -files = [ - {file = "python-jose-3.3.0.tar.gz", hash = "sha256:55779b5e6ad599c6336191246e95eb2293a9ddebd555f796a65f838f07e5d78a"}, - {file = "python_jose-3.3.0-py2.py3-none-any.whl", hash = "sha256:9b1376b023f8b298536eedd47ae1089bcdb848f1535ab30555cd92002d78923a"}, -] - -[package.dependencies] -cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"cryptography\""} -ecdsa = "!=0.15" -pyasn1 = "*" -rsa = "*" - -[package.extras] -cryptography = ["cryptography (>=3.4.0)"] -pycrypto = ["pyasn1", "pycrypto (>=2.6.0,<2.7.0)"] -pycryptodome = ["pyasn1", "pycryptodome (>=3.3.1,<4.0.0)"] - [[package]] name = "python-snappy" version = "0.6.1" @@ -3277,35 +3247,31 @@ files = [ [[package]] name = "ray" -version = "2.7.2" +version = "2.9.2" description = "Ray provides a simple, universal API for building distributed applications." optional = true -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "ray-2.7.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:4c415fc90df8a29cf0d594774a456caecf05103d772d487abb6fdda3397ee68e"}, - {file = "ray-2.7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0434fe4eb416e5e4f39dbafdd1ead3407f72589cae19d1739b90a35d951e17bc"}, - {file = "ray-2.7.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:b8966bc9e8d005600bdfdb1bfe19241ffbf76b3c111d3931cad2200bc2190d58"}, - {file = "ray-2.7.2-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:162f7e637067a6408f3041755b8df649624fb7200dc7566b142cc5877580b6e3"}, - {file = "ray-2.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:9411fc45e94608c78b0bf77e3ed1dfddbafa0c16e12bcec81cbce26c7ab0791c"}, - {file = "ray-2.7.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:b0db75f9c9116c80d6846498b099f2738ad8b9081a94c625365c6dc95baf50bc"}, - {file = "ray-2.7.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:20c782af16c9ec5a6ef9c933cea0b2be1a81a90162a4f14415b8692e5a3e1ceb"}, - {file = "ray-2.7.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9a94ee86699fcc42c50ff12b482ad817e71027ceae5c7c26af251931b32ee6d3"}, - {file = "ray-2.7.2-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:75cfbf61383b5e6940cced2033c579e59458fd07d20ba0cf6bcf75f62fb101a2"}, - {file = "ray-2.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:33f99896e8eb19bcd4b029d3b22d9c50e56acd9a3774ff2b074e8c6926124cff"}, - {file = "ray-2.7.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:b8fbf0be759f0a575ec1f504327bffb5d2340bca33fc37beafed91cb78bfe5f5"}, - {file = "ray-2.7.2-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:d5bd397b19b4faf8df82c46dd705b17cc976efefda48145b595c97dd117fa79f"}, - {file = "ray-2.7.2-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:ee681a5707d9e91850490f42ee007ed8c1c54ea38f5c831aada347f3cc1d9b03"}, - {file = "ray-2.7.2-cp37-cp37m-win_amd64.whl", hash = "sha256:155ec98c73c55086e855b2e1f7d6154301556c14bb85d69b17cab087bfcf5268"}, - {file = "ray-2.7.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:94817f7ae24b78213cbcafdeb44e44ae7132e617693725e2a1bbac0461a14382"}, - {file = "ray-2.7.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3217cbabb31bbfa87a594aaf27371e722a6d527621ef3b01b3f4603b2997cc0d"}, - {file = "ray-2.7.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:19bc3eaabc2635d3a53f43db8198d1a56263a0697401fb7716f90ff60415f2be"}, - {file = "ray-2.7.2-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:158c424cad4480e93a90d9460a8dd3390bafab220220bb8678f54bc1afecda43"}, - {file = "ray-2.7.2-cp38-cp38-win_amd64.whl", hash = "sha256:f1a41c3511ea90fef47b95721e6589427955503a3fcde1615c6371eeb2c515ab"}, - {file = "ray-2.7.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:6820a6369536e070a72d8bf439cc7085b17f0c5102fb51860b330bf231382e7e"}, - {file = "ray-2.7.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e226f8739a4726c445fc41dbb41f7440fa202ad97099ee9d35cc8bc91f1d2c5f"}, - {file = "ray-2.7.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:29930d38bd563d57e3457f85dddd0347c79018d12ef42db35abc648c127dce81"}, - {file = "ray-2.7.2-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:90eb9415f4abc39ce054f2148dcb97fbb9ae725dd150ea7b030ec9d7cf6859f2"}, - {file = "ray-2.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:8010e9f0ac04d901a96b3876def3899867f705148a7d820c08c02c7322d08217"}, + {file = "ray-2.9.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:f879522e7d9b809d3aa28fb627ab87344b31cf79e1829b9b67f0581305e2bb84"}, + {file = "ray-2.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ccbcf7f57bf10c52b3ebcec6e8d9114491ef12a20255e70ba0d5f12a81e9391c"}, + {file = "ray-2.9.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a3059e1d4287db33811e7604b4ecc1aa79dc2d745b49e5ec3415060da61cb749"}, + {file = "ray-2.9.2-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:11a0fd3f75ca07f727b1e83c3b2c74e75dc1fe0aba99a416a865f83e7ff23620"}, + {file = "ray-2.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:1e5c4733314bab19d89b373836899e76b2ab839d45f59966b431c89076feaab7"}, + {file = "ray-2.9.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1a734c1c586e666f5024b46405c3df52b634977c9565bca16a01d6fefc457578"}, + {file = "ray-2.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:47975fcb4b6e6cadd35da4b78f3e643f5ab6e99a688d79980ca2f0dca345d034"}, + {file = "ray-2.9.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:7d7ed76fa40abe81eefb158d505f046de0614d994fc8027f5b05889824503257"}, + {file = "ray-2.9.2-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:83fd7961d39da5ae68731be430891a21a13d0257bc25ab6adf13712297e309fa"}, + {file = "ray-2.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:5929ac8221ba3b6446cd0885a0ade50cfaaecacba771dfeed57ead8b5c6fdd14"}, + {file = "ray-2.9.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:61b5a742f1f249e92893433720423f729018d40ee26a015b6a12b443d0e2e3eb"}, + {file = "ray-2.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:51552b1142944e13ba1da0c44395a627701c563fbe3f6c490001e6e4fd0ee011"}, + {file = "ray-2.9.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8b715b0ad9aa027836ecb7dc33b3a2dfc91c5d9d22a0ddf72c0844df5d641fca"}, + {file = "ray-2.9.2-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:e1a35e2a3de4e3875bd1e76770fb89149adc773193a5e79488db4047ef14ffd0"}, + {file = "ray-2.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:8d97f674c675370550ec4347e7e8dee0f99e38dd8f220ff8acb8ca15c208d73a"}, + {file = "ray-2.9.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:efa2c60ab11f41e4d43a227cd6bf491f9f2f8ed820c482c7d8d86a2412b6fd05"}, + {file = "ray-2.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5ebd71ef2e4d76752a1ff048e9d4c22811c7e990e8d4e3b30974b3e4099411b6"}, + {file = "ray-2.9.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:d84064ab3aa2868991a98dc6a54cc2221abcaf9406eb95fa2ec0f66006585f92"}, + {file = "ray-2.9.2-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:bc95efd035dcdc2f2b549ce3e13c5abf2043f3136b8a5980d77f4f098a9a6796"}, + {file = "ray-2.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:aea2ad4dbad2d6bd21ba17f7a2fcf762f53d8bcbc30b9d6916245e447a971e48"}, ] [package.dependencies] @@ -3315,26 +3281,22 @@ filelock = "*" frozenlist = "*" jsonschema = "*" msgpack = ">=1.0.0,<2.0.0" -numpy = [ - {version = ">=1.16", markers = "python_version < \"3.9\""}, - {version = ">=1.19.3", markers = "python_version >= \"3.9\""}, -] packaging = "*" protobuf = ">=3.15.3,<3.19.5 || >3.19.5" pyyaml = "*" requests = "*" [package.extras] -air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] -all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pyyaml", "ray-cpp (==2.7.2)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] +air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi (<=0.108.0)", "fsspec", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] +all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi (<=0.108.0)", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "pyyaml", "ray-cpp (==2.9.2)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] client = ["grpcio (!=1.56.0)"] -cpp = ["ray-cpp (==2.7.2)"] +cpp = ["ray-cpp (==2.9.2)"] data = ["fsspec", "numpy (>=1.20)", "pandas (>=1.3)", "pyarrow (>=6.0.1)"] -default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "virtualenv (>=20.0.24,<20.21.1)"] +default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "virtualenv (>=20.0.24,!=20.21.1)"] observability = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"] rllib = ["dm-tree", "fsspec", "gymnasium (==0.28.1)", "lz4", "pandas", "pyarrow (>=6.0.1)", "pyyaml", "requests", "rich", "scikit-image", "scipy", "tensorboardX (>=1.9)", "typer"] -serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] -serve-grpc = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] +serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi (<=0.108.0)", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "starlette", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] +serve-grpc = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi (<=0.108.0)", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "starlette", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] train = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] tune = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] @@ -3677,7 +3639,7 @@ files = [ name = "rsa" version = "4.9" description = "Pure-Python RSA implementation" -optional = false +optional = true python-versions = ">=3.6,<4" files = [ {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, @@ -3779,60 +3741,60 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.25" +version = "2.0.27" description = "Database Abstraction Library" optional = true python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4344d059265cc8b1b1be351bfb88749294b87a8b2bbe21dfbe066c4199541ebd"}, - {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f9e2e59cbcc6ba1488404aad43de005d05ca56e069477b33ff74e91b6319735"}, - {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84daa0a2055df9ca0f148a64fdde12ac635e30edbca80e87df9b3aaf419e144a"}, - {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc8b7dabe8e67c4832891a5d322cec6d44ef02f432b4588390017f5cec186a84"}, - {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f5693145220517b5f42393e07a6898acdfe820e136c98663b971906120549da5"}, - {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db854730a25db7c956423bb9fb4bdd1216c839a689bf9cc15fada0a7fb2f4570"}, - {file = "SQLAlchemy-2.0.25-cp310-cp310-win32.whl", hash = "sha256:14a6f68e8fc96e5e8f5647ef6cda6250c780612a573d99e4d881581432ef1669"}, - {file = "SQLAlchemy-2.0.25-cp310-cp310-win_amd64.whl", hash = "sha256:87f6e732bccd7dcf1741c00f1ecf33797383128bd1c90144ac8adc02cbb98643"}, - {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:342d365988ba88ada8af320d43df4e0b13a694dbd75951f537b2d5e4cb5cd002"}, - {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f37c0caf14b9e9b9e8f6dbc81bc56db06acb4363eba5a633167781a48ef036ed"}, - {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa9373708763ef46782d10e950b49d0235bfe58facebd76917d3f5cbf5971aed"}, - {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d24f571990c05f6b36a396218f251f3e0dda916e0c687ef6fdca5072743208f5"}, - {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75432b5b14dc2fff43c50435e248b45c7cdadef73388e5610852b95280ffd0e9"}, - {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:884272dcd3ad97f47702965a0e902b540541890f468d24bd1d98bcfe41c3f018"}, - {file = "SQLAlchemy-2.0.25-cp311-cp311-win32.whl", hash = "sha256:e607cdd99cbf9bb80391f54446b86e16eea6ad309361942bf88318bcd452363c"}, - {file = "SQLAlchemy-2.0.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d505815ac340568fd03f719446a589162d55c52f08abd77ba8964fbb7eb5b5f"}, - {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0dacf67aee53b16f365c589ce72e766efaabd2b145f9de7c917777b575e3659d"}, - {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b801154027107461ee992ff4b5c09aa7cc6ec91ddfe50d02bca344918c3265c6"}, - {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59a21853f5daeb50412d459cfb13cb82c089ad4c04ec208cd14dddd99fc23b39"}, - {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29049e2c299b5ace92cbed0c1610a7a236f3baf4c6b66eb9547c01179f638ec5"}, - {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b64b183d610b424a160b0d4d880995e935208fc043d0302dd29fee32d1ee3f95"}, - {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f7a7d7fcc675d3d85fbf3b3828ecd5990b8d61bd6de3f1b260080b3beccf215"}, - {file = "SQLAlchemy-2.0.25-cp312-cp312-win32.whl", hash = "sha256:cf18ff7fc9941b8fc23437cc3e68ed4ebeff3599eec6ef5eebf305f3d2e9a7c2"}, - {file = "SQLAlchemy-2.0.25-cp312-cp312-win_amd64.whl", hash = "sha256:91f7d9d1c4dd1f4f6e092874c128c11165eafcf7c963128f79e28f8445de82d5"}, - {file = "SQLAlchemy-2.0.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bb209a73b8307f8fe4fe46f6ad5979649be01607f11af1eb94aa9e8a3aaf77f0"}, - {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:798f717ae7c806d67145f6ae94dc7c342d3222d3b9a311a784f371a4333212c7"}, - {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fdd402169aa00df3142149940b3bf9ce7dde075928c1886d9a1df63d4b8de62"}, - {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0d3cab3076af2e4aa5693f89622bef7fa770c6fec967143e4da7508b3dceb9b9"}, - {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:74b080c897563f81062b74e44f5a72fa44c2b373741a9ade701d5f789a10ba23"}, - {file = "SQLAlchemy-2.0.25-cp37-cp37m-win32.whl", hash = "sha256:87d91043ea0dc65ee583026cb18e1b458d8ec5fc0a93637126b5fc0bc3ea68c4"}, - {file = "SQLAlchemy-2.0.25-cp37-cp37m-win_amd64.whl", hash = "sha256:75f99202324383d613ddd1f7455ac908dca9c2dd729ec8584c9541dd41822a2c"}, - {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:420362338681eec03f53467804541a854617faed7272fe71a1bfdb07336a381e"}, - {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c88f0c7dcc5f99bdb34b4fd9b69b93c89f893f454f40219fe923a3a2fd11625"}, - {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3be4987e3ee9d9a380b66393b77a4cd6d742480c951a1c56a23c335caca4ce3"}, - {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a159111a0f58fb034c93eeba211b4141137ec4b0a6e75789ab7a3ef3c7e7e3"}, - {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8b8cb63d3ea63b29074dcd29da4dc6a97ad1349151f2d2949495418fd6e48db9"}, - {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:736ea78cd06de6c21ecba7416499e7236a22374561493b456a1f7ffbe3f6cdb4"}, - {file = "SQLAlchemy-2.0.25-cp38-cp38-win32.whl", hash = "sha256:10331f129982a19df4284ceac6fe87353ca3ca6b4ca77ff7d697209ae0a5915e"}, - {file = "SQLAlchemy-2.0.25-cp38-cp38-win_amd64.whl", hash = "sha256:c55731c116806836a5d678a70c84cb13f2cedba920212ba7dcad53260997666d"}, - {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:605b6b059f4b57b277f75ace81cc5bc6335efcbcc4ccb9066695e515dbdb3900"}, - {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:665f0a3954635b5b777a55111ababf44b4fc12b1f3ba0a435b602b6387ffd7cf"}, - {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecf6d4cda1f9f6cb0b45803a01ea7f034e2f1aed9475e883410812d9f9e3cfcf"}, - {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c51db269513917394faec5e5c00d6f83829742ba62e2ac4fa5c98d58be91662f"}, - {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:790f533fa5c8901a62b6fef5811d48980adeb2f51f1290ade8b5e7ba990ba3de"}, - {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1b1180cda6df7af84fe72e4530f192231b1f29a7496951db4ff38dac1687202d"}, - {file = "SQLAlchemy-2.0.25-cp39-cp39-win32.whl", hash = "sha256:555651adbb503ac7f4cb35834c5e4ae0819aab2cd24857a123370764dc7d7e24"}, - {file = "SQLAlchemy-2.0.25-cp39-cp39-win_amd64.whl", hash = "sha256:dc55990143cbd853a5d038c05e79284baedf3e299661389654551bd02a6a68d7"}, - {file = "SQLAlchemy-2.0.25-py3-none-any.whl", hash = "sha256:a86b4240e67d4753dc3092d9511886795b3c2852abe599cffe108952f7af7ac3"}, - {file = "SQLAlchemy-2.0.25.tar.gz", hash = "sha256:a2c69a7664fb2d54b8682dd774c3b54f67f84fa123cf84dda2a5f40dcaa04e08"}, + {file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d04e579e911562f1055d26dab1868d3e0bb905db3bccf664ee8ad109f035618a"}, + {file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa67d821c1fd268a5a87922ef4940442513b4e6c377553506b9db3b83beebbd8"}, + {file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c7a596d0be71b7baa037f4ac10d5e057d276f65a9a611c46970f012752ebf2d"}, + {file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:954d9735ee9c3fa74874c830d089a815b7b48df6f6b6e357a74130e478dbd951"}, + {file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5cd20f58c29bbf2680039ff9f569fa6d21453fbd2fa84dbdb4092f006424c2e6"}, + {file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:03f448ffb731b48323bda68bcc93152f751436ad6037f18a42b7e16af9e91c07"}, + {file = "SQLAlchemy-2.0.27-cp310-cp310-win32.whl", hash = "sha256:d997c5938a08b5e172c30583ba6b8aad657ed9901fc24caf3a7152eeccb2f1b4"}, + {file = "SQLAlchemy-2.0.27-cp310-cp310-win_amd64.whl", hash = "sha256:eb15ef40b833f5b2f19eeae65d65e191f039e71790dd565c2af2a3783f72262f"}, + {file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c5bad7c60a392850d2f0fee8f355953abaec878c483dd7c3836e0089f046bf6"}, + {file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3012ab65ea42de1be81fff5fb28d6db893ef978950afc8130ba707179b4284a"}, + {file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbcd77c4d94b23e0753c5ed8deba8c69f331d4fd83f68bfc9db58bc8983f49cd"}, + {file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d177b7e82f6dd5e1aebd24d9c3297c70ce09cd1d5d37b43e53f39514379c029c"}, + {file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:680b9a36029b30cf063698755d277885d4a0eab70a2c7c6e71aab601323cba45"}, + {file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1306102f6d9e625cebaca3d4c9c8f10588735ef877f0360b5cdb4fdfd3fd7131"}, + {file = "SQLAlchemy-2.0.27-cp311-cp311-win32.whl", hash = "sha256:5b78aa9f4f68212248aaf8943d84c0ff0f74efc65a661c2fc68b82d498311fd5"}, + {file = "SQLAlchemy-2.0.27-cp311-cp311-win_amd64.whl", hash = "sha256:15e19a84b84528f52a68143439d0c7a3a69befcd4f50b8ef9b7b69d2628ae7c4"}, + {file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0de1263aac858f288a80b2071990f02082c51d88335a1db0d589237a3435fe71"}, + {file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce850db091bf7d2a1f2fdb615220b968aeff3849007b1204bf6e3e50a57b3d32"}, + {file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dfc936870507da96aebb43e664ae3a71a7b96278382bcfe84d277b88e379b18"}, + {file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4fbe6a766301f2e8a4519f4500fe74ef0a8509a59e07a4085458f26228cd7cc"}, + {file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4535c49d961fe9a77392e3a630a626af5baa967172d42732b7a43496c8b28876"}, + {file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0fb3bffc0ced37e5aa4ac2416f56d6d858f46d4da70c09bb731a246e70bff4d5"}, + {file = "SQLAlchemy-2.0.27-cp312-cp312-win32.whl", hash = "sha256:7f470327d06400a0aa7926b375b8e8c3c31d335e0884f509fe272b3c700a7254"}, + {file = "SQLAlchemy-2.0.27-cp312-cp312-win_amd64.whl", hash = "sha256:f9374e270e2553653d710ece397df67db9d19c60d2647bcd35bfc616f1622dcd"}, + {file = "SQLAlchemy-2.0.27-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e97cf143d74a7a5a0f143aa34039b4fecf11343eed66538610debc438685db4a"}, + {file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7b5a3e2120982b8b6bd1d5d99e3025339f7fb8b8267551c679afb39e9c7c7f1"}, + {file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e36aa62b765cf9f43a003233a8c2d7ffdeb55bc62eaa0a0380475b228663a38f"}, + {file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5ada0438f5b74c3952d916c199367c29ee4d6858edff18eab783b3978d0db16d"}, + {file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b1d9d1bfd96eef3c3faedb73f486c89e44e64e40e5bfec304ee163de01cf996f"}, + {file = "SQLAlchemy-2.0.27-cp37-cp37m-win32.whl", hash = "sha256:ca891af9f3289d24a490a5fde664ea04fe2f4984cd97e26de7442a4251bd4b7c"}, + {file = "SQLAlchemy-2.0.27-cp37-cp37m-win_amd64.whl", hash = "sha256:fd8aafda7cdff03b905d4426b714601c0978725a19efc39f5f207b86d188ba01"}, + {file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ec1f5a328464daf7a1e4e385e4f5652dd9b1d12405075ccba1df842f7774b4fc"}, + {file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ad862295ad3f644e3c2c0d8b10a988e1600d3123ecb48702d2c0f26771f1c396"}, + {file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48217be1de7d29a5600b5c513f3f7664b21d32e596d69582be0a94e36b8309cb"}, + {file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e56afce6431450442f3ab5973156289bd5ec33dd618941283847c9fd5ff06bf"}, + {file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:611068511b5531304137bcd7fe8117c985d1b828eb86043bd944cebb7fae3910"}, + {file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b86abba762ecfeea359112b2bb4490802b340850bbee1948f785141a5e020de8"}, + {file = "SQLAlchemy-2.0.27-cp38-cp38-win32.whl", hash = "sha256:30d81cc1192dc693d49d5671cd40cdec596b885b0ce3b72f323888ab1c3863d5"}, + {file = "SQLAlchemy-2.0.27-cp38-cp38-win_amd64.whl", hash = "sha256:120af1e49d614d2525ac247f6123841589b029c318b9afbfc9e2b70e22e1827d"}, + {file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d07ee7793f2aeb9b80ec8ceb96bc8cc08a2aec8a1b152da1955d64e4825fcbac"}, + {file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cb0845e934647232b6ff5150df37ceffd0b67b754b9fdbb095233deebcddbd4a"}, + {file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fc19ae2e07a067663dd24fca55f8ed06a288384f0e6e3910420bf4b1270cc51"}, + {file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b90053be91973a6fb6020a6e44382c97739736a5a9d74e08cc29b196639eb979"}, + {file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2f5c9dfb0b9ab5e3a8a00249534bdd838d943ec4cfb9abe176a6c33408430230"}, + {file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:33e8bde8fff203de50399b9039c4e14e42d4d227759155c21f8da4a47fc8053c"}, + {file = "SQLAlchemy-2.0.27-cp39-cp39-win32.whl", hash = "sha256:d873c21b356bfaf1589b89090a4011e6532582b3a8ea568a00e0c3aab09399dd"}, + {file = "SQLAlchemy-2.0.27-cp39-cp39-win_amd64.whl", hash = "sha256:ff2f1b7c963961d41403b650842dc2039175b906ab2093635d8319bef0b7d620"}, + {file = "SQLAlchemy-2.0.27-py3-none-any.whl", hash = "sha256:1ab4e0448018d01b142c916cc7119ca573803a4745cfe341b8f95657812700ac"}, + {file = "SQLAlchemy-2.0.27.tar.gz", hash = "sha256:86a6ed69a71fe6b88bf9331594fa390a2adda4a49b5c06f98e47bf0d392534f8"}, ] [package.dependencies] @@ -3864,24 +3826,6 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] pymysql = ["pymysql"] sqlcipher = ["sqlcipher3_binary"] -[[package]] -name = "sshpubkeys" -version = "3.3.1" -description = "SSH public key parser" -optional = false -python-versions = ">=3" -files = [ - {file = "sshpubkeys-3.3.1-py2.py3-none-any.whl", hash = "sha256:946f76b8fe86704b0e7c56a00d80294e39bc2305999844f079a217885060b1ac"}, - {file = "sshpubkeys-3.3.1.tar.gz", hash = "sha256:3020ed4f8c846849299370fbe98ff4157b0ccc1accec105e07cfa9ae4bb55064"}, -] - -[package.dependencies] -cryptography = ">=2.1.4" -ecdsa = ">=0.13" - -[package.extras] -dev = ["twine", "wheel", "yapf"] - [[package]] name = "strictyaml" version = "1.7.3" @@ -3910,6 +3854,20 @@ files = [ [package.dependencies] mpmath = ">=0.19" +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + [[package]] name = "thrift" version = "0.16.0" @@ -3939,6 +3897,26 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tqdm" +version = "4.66.1" +description = "Fast, Extensible Progress Meter" +optional = true +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, + {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "typing-extensions" version = "4.9.0" @@ -4302,6 +4280,7 @@ cffi = ["cffi (>=1.11)"] [extras] adlfs = ["adlfs"] +daft = ["getdaft"] duckdb = ["duckdb", "pyarrow"] dynamodb = ["boto3"] gcsfs = ["gcsfs"] @@ -4319,4 +4298,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "6d7f598c9ddb0e7565ef8af1b3270c9e5eb32718abfd0a3a7d5c192543150ba8" +content-hash = "5553acdf7ad32ec9dbc74523b9f7ff241907b0d8129a40e3052750c3182e7539" diff --git a/pyiceberg/avro/decoder_fast.pyi b/pyiceberg/avro/decoder_fast.pyi index cf45ce5066..cc367c4764 100644 --- a/pyiceberg/avro/decoder_fast.pyi +++ b/pyiceberg/avro/decoder_fast.pyi @@ -20,37 +20,54 @@ from pyiceberg.avro.decoder import BinaryDecoder class CythonBinaryDecoder(BinaryDecoder): def __init__(self, input_contents: bytes) -> None: pass + def tell(self) -> int: pass + def read(self, n: int) -> bytes: pass + def read_boolean(self) -> bool: pass + def read_int(self) -> int: pass + def read_ints(self, count: int) -> tuple[int, ...]: pass + def read_int_bytes_dict(self, count: int, dest: dict[int, bytes]) -> None: pass + def read_bytes(self) -> bytes: pass + def read_float(self) -> float: pass + def read_double(self) -> float: pass + def read_utf8(self) -> str: pass + def skip(self, n: int) -> None: pass + def skip_int(self) -> None: pass + def skip_boolean(self) -> None: pass + def skip_float(self) -> None: pass + def skip_double(self) -> None: pass + def skip_bytes(self) -> None: pass + def skip_utf8(self) -> None: pass diff --git a/pyiceberg/avro/decoder_fast.pyx b/pyiceberg/avro/decoder_fast.pyx index 182fd0e92e..52caec3308 100644 --- a/pyiceberg/avro/decoder_fast.pyx +++ b/pyiceberg/avro/decoder_fast.pyx @@ -32,9 +32,7 @@ unsigned_long_long_array_template = cython.declare(array.array, array.array('Q', @cython.final cdef class CythonBinaryDecoder: - """Implement a BinaryDecoder that reads from an in-memory buffer. - - """ + """Implement a BinaryDecoder that reads from an in-memory buffer.""" # This the data that is duplicated when the decoder is created. cdef unsigned char *_data diff --git a/pyiceberg/avro/file.py b/pyiceberg/avro/file.py index a8befd9f91..2f21e165b4 100644 --- a/pyiceberg/avro/file.py +++ b/pyiceberg/avro/file.py @@ -194,8 +194,7 @@ def _read_block(self) -> int: raise ValueError(f"Expected sync bytes {self.header.sync!r}, but got {sync_marker!r}") block_records = self.decoder.read_int() - block_bytes_len = self.decoder.read_int() - block_bytes = self.decoder.read(block_bytes_len) + block_bytes = self.decoder.read_bytes() if codec := self.header.compression_codec(): block_bytes = codec.decompress(block_bytes) diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index 4559c5cf21..3250b2426a 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -36,7 +36,7 @@ cast, ) -from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchTableError, NotInstalledError +from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchTableError, NotInstalledError, TableAlreadyExistsError from pyiceberg.io import FileIO, load_file_io from pyiceberg.manifest import ManifestFile from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec @@ -323,6 +323,34 @@ def create_table( TableAlreadyExistsError: If a table with the name already exists. """ + def create_table_if_not_exists( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + location: Optional[str] = None, + partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC, + sort_order: SortOrder = UNSORTED_SORT_ORDER, + properties: Properties = EMPTY_DICT, + ) -> Table: + """Create a table if it does not exist. + + Args: + identifier (str | Identifier): Table identifier. + schema (Schema): Table's schema. + location (str | None): Location for the table. Optional Argument. + partition_spec (PartitionSpec): PartitionSpec for the table. + sort_order (SortOrder): SortOrder for the table. + properties (Properties): Table properties that can be a string based dictionary. + + Returns: + Table: the created table instance if the table does not exist, else the existing + table instance. + """ + try: + return self.create_table(identifier, schema, location, partition_spec, sort_order, properties) + except TableAlreadyExistsError: + return self.load_table(identifier) + @abstractmethod def load_table(self, identifier: Union[str, Identifier]) -> Table: """Load the table's metadata and returns the table instance. @@ -393,6 +421,8 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons Raises: NoSuchTableError: If a table with the given identifier does not exist. + CommitFailedException: Requirement not met, or a conflict with a concurrent commit. + CommitStateUnknownException: Failed due to an internal exception on the side of the catalog. """ @abstractmethod diff --git a/pyiceberg/catalog/dynamodb.py b/pyiceberg/catalog/dynamodb.py index d5f3b5e14c..b7b0f3ddb1 100644 --- a/pyiceberg/catalog/dynamodb.py +++ b/pyiceberg/catalog/dynamodb.py @@ -208,6 +208,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons Raises: NoSuchTableError: If a table with the given identifier does not exist. + CommitFailedException: Requirement not met, or a conflict with a concurrent commit. """ raise NotImplementedError diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 8f860fabba..089a30ba61 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -85,6 +85,7 @@ StringType, StructType, TimestampType, + TimestamptzType, TimeType, UUIDType, ) @@ -125,6 +126,7 @@ def _construct_parameters( StringType: "string", UUIDType: "string", TimestampType: "timestamp", + TimestamptzType: "timestamp", FixedType: "binary", BinaryType: "binary", } @@ -150,7 +152,7 @@ def primitive(self, primitive: PrimitiveType) -> str: if isinstance(primitive, DecimalType): return f"decimal({primitive.precision},{primitive.scale})" if (primitive_type := type(primitive)) not in GLUE_PRIMITIVE_TYPES: - raise ValueError(f"Unknown primitive type: {primitive}") + return str(primitive_type.root) return GLUE_PRIMITIVE_TYPES[primitive_type] @@ -402,7 +404,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons Raises: NoSuchTableError: If a table with the given identifier does not exist. - CommitFailedException: If the commit failed. + CommitFailedException: Requirement not met, or a conflict with a concurrent commit. """ identifier_tuple = self.identifier_to_tuple_without_catalog( tuple(table_request.identifier.namespace.root + [table_request.identifier.name]) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 8069321095..c24355f6fb 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import getpass +import socket import time from types import TracebackType from typing import ( @@ -34,10 +35,17 @@ AlreadyExistsException, FieldSchema, InvalidOperationException, + LockComponent, + LockLevel, + LockRequest, + LockResponse, + LockState, + LockType, MetaException, NoSuchObjectException, SerDeInfo, StorageDescriptor, + UnlockRequest, ) from hive_metastore.ttypes import Database as HiveDatabase from hive_metastore.ttypes import Table as HiveTable @@ -56,6 +64,7 @@ PropertiesUpdateSummary, ) from pyiceberg.exceptions import ( + CommitFailedException, NamespaceAlreadyExistsError, NamespaceNotEmptyError, NoSuchIcebergTableError, @@ -67,7 +76,7 @@ from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema, SchemaVisitor, visit from pyiceberg.serializers import FromInputFile -from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table, update_table_metadata +from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table, TableProperties, update_table_metadata from pyiceberg.table.metadata import new_table_metadata from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder from pyiceberg.typedef import EMPTY_DICT @@ -121,17 +130,21 @@ class _HiveClient: _transport: TTransport _client: Client + _ugi: Optional[List[str]] - def __init__(self, uri: str): + def __init__(self, uri: str, ugi: Optional[str] = None): url_parts = urlparse(uri) transport = TSocket.TSocket(url_parts.hostname, url_parts.port) self._transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) self._client = Client(protocol) + self._ugi = ugi.split(':') if ugi else None def __enter__(self) -> Client: self._transport.open() + if self._ugi: + self._client.set_ugi(*self._ugi) return self._client def __exit__( @@ -155,7 +168,7 @@ def _construct_hive_storage_descriptor(schema: Schema, location: Optional[str]) PROP_TABLE_TYPE = "table_type" PROP_METADATA_LOCATION = "metadata_location" PROP_PREVIOUS_METADATA_LOCATION = "previous_metadata_location" -DEFAULT_PROPERTIES = {'write.parquet.compression-codec': 'zstd'} +DEFAULT_PROPERTIES = {TableProperties.PARQUET_COMPRESSION: TableProperties.PARQUET_COMPRESSION_DEFAULT} def _construct_parameters(metadata_location: str, previous_metadata_location: Optional[str] = None) -> Dict[str, Any]: @@ -224,7 +237,7 @@ class HiveCatalog(Catalog): def __init__(self, name: str, **properties: str): super().__init__(name, **properties) - self._client = _HiveClient(properties["uri"]) + self._client = _HiveClient(properties["uri"], properties.get("ugi")) def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table: properties: Dict[str, str] = table.parameters @@ -331,6 +344,15 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: """ raise NotImplementedError + def _create_lock_request(self, database_name: str, table_name: str) -> LockRequest: + lock_component: LockComponent = LockComponent( + level=LockLevel.TABLE, type=LockType.EXCLUSIVE, dbname=database_name, tablename=table_name, isTransactional=True + ) + + lock_request: LockRequest = LockRequest(component=[lock_component], user=getpass.getuser(), hostname=socket.gethostname()) + + return lock_request + def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: """Update the table. @@ -342,6 +364,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons Raises: NoSuchTableError: If a table with the given identifier does not exist. + CommitFailedException: Requirement not met, or a conflict with a concurrent commit. """ identifier_tuple = self.identifier_to_tuple_without_catalog( tuple(table_request.identifier.namespace.root + [table_request.identifier.name]) @@ -363,15 +386,23 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons self._write_metadata(updated_metadata, current_table.io, new_metadata_location) # commit to hive - try: - with self._client as open_client: + # https://github.com/apache/hive/blob/master/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift#L1232 + with self._client as open_client: + lock: LockResponse = open_client.lock(self._create_lock_request(database_name, table_name)) + + try: + if lock.state != LockState.ACQUIRED: + raise CommitFailedException(f"Failed to acquire lock for {table_request.identifier}, state: {lock.state}") + tbl = open_client.get_table(dbname=database_name, tbl_name=table_name) tbl.parameters = _construct_parameters( metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location ) open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=tbl) - except NoSuchObjectException as e: - raise NoSuchTableError(f"Table does not exist: {table_name}") from e + except NoSuchObjectException as e: + raise NoSuchTableError(f"Table does not exist: {table_name}") from e + finally: + open_client.unlock(UnlockRequest(lockid=lock.lockid)) return CommitTableResponse(metadata=updated_metadata, metadata_location=new_metadata_location) diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py index 34d75b5936..2adeafb593 100644 --- a/pyiceberg/catalog/rest.py +++ b/pyiceberg/catalog/rest.py @@ -28,8 +28,9 @@ Union, ) -from pydantic import Field, ValidationError +from pydantic import Field, ValidationError, field_validator from requests import HTTPError, Session +from tenacity import RetryCallState, retry, retry_if_exception_type, stop_after_attempt from pyiceberg import __version__ from pyiceberg.catalog import ( @@ -57,8 +58,8 @@ TableAlreadyExistsError, UnauthorizedError, ) -from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec -from pyiceberg.schema import Schema +from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec, assign_fresh_partition_spec_ids +from pyiceberg.schema import Schema, assign_fresh_schema_ids from pyiceberg.table import ( CommitTableRequest, CommitTableResponse, @@ -66,8 +67,9 @@ TableIdentifier, TableMetadata, ) -from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder +from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder, assign_fresh_sort_order_ids from pyiceberg.typedef import EMPTY_DICT, UTF8, IcebergBaseModel +from pyiceberg.types import transform_dict_value_to_str if TYPE_CHECKING: import pyarrow as pa @@ -114,8 +116,22 @@ class Endpoints: SIGV4_REGION = "rest.signing-region" SIGV4_SERVICE = "rest.signing-name" AUTH_URL = "rest.authorization-url" +HEADER_PREFIX = "header." -NAMESPACE_SEPARATOR = b"\x1F".decode(UTF8) +NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) + + +def _retry_hook(retry_state: RetryCallState) -> None: + rest_catalog: RestCatalog = retry_state.args[0] + rest_catalog._refresh_token() # pylint: disable=protected-access + + +_RETRY_ARGS = { + "retry": retry_if_exception_type(AuthorizationExpiredError), + "stop": stop_after_attempt(2), + "before_sleep": _retry_hook, + "reraise": True, +} class TableResponse(IcebergBaseModel): @@ -132,6 +148,8 @@ class CreateTableRequest(IcebergBaseModel): write_order: Optional[SortOrder] = Field(alias="write-order") stage_create: bool = Field(alias="stage-create", default=False) properties: Properties = Field(default_factory=dict) + # validators + transform_properties_dict_value_to_str = field_validator('properties', mode='before')(transform_dict_value_to_str) class RegisterTableRequest(IcebergBaseModel): @@ -142,8 +160,10 @@ class RegisterTableRequest(IcebergBaseModel): class TokenResponse(IcebergBaseModel): access_token: str = Field() token_type: str = Field() - expires_in: int = Field() - issued_token_type: str = Field() + expires_in: Optional[int] = Field(default=None) + issued_token_type: Optional[str] = Field(default=None) + refresh_token: Optional[str] = Field(default=None) + scope: Optional[str] = Field(default=None) class ConfigResponse(IcebergBaseModel): @@ -217,26 +237,18 @@ def _create_session(self) -> Session: # Sets the client side and server side SSL cert verification, if provided as properties. if ssl_config := self.properties.get(SSL): - if ssl_ca_bundle := ssl_config.get(CA_BUNDLE): # type: ignore + if ssl_ca_bundle := ssl_config.get(CA_BUNDLE): session.verify = ssl_ca_bundle - if ssl_client := ssl_config.get(CLIENT): # type: ignore + if ssl_client := ssl_config.get(CLIENT): if all(k in ssl_client for k in (CERT, KEY)): session.cert = (ssl_client[CERT], ssl_client[KEY]) elif ssl_client_cert := ssl_client.get(CERT): session.cert = ssl_client_cert - # If we have credentials, but not a token, we want to fetch a token - if TOKEN not in self.properties and CREDENTIAL in self.properties: - self.properties[TOKEN] = self._fetch_access_token(session, self.properties[CREDENTIAL]) - - # Set Auth token for subsequent calls in the session - if token := self.properties.get(TOKEN): - session.headers[AUTHORIZATION_HEADER] = f"{BEARER_PREFIX} {token}" + self._refresh_token(session, self.properties.get(TOKEN)) # Set HTTP headers - session.headers["Content-type"] = "application/json" - session.headers["X-Client-Version"] = ICEBERG_REST_SPEC_VERSION - session.headers["User-Agent"] = f"PyIceberg/{__version__}" + self._config_headers(session) # Configure SigV4 Request Signing if str(self.properties.get(SIGV4, False)).lower() == "true": @@ -283,8 +295,9 @@ def _fetch_access_token(self, session: Session, credential: str) -> str: else: client_id, client_secret = None, credential data = {GRANT_TYPE: CLIENT_CREDENTIALS, CLIENT_ID: client_id, CLIENT_SECRET: client_secret, SCOPE: CATALOG_SCOPE} - # Uses application/x-www-form-urlencoded by default - response = session.post(url=self.auth_url, data=data) + response = session.post( + url=self.auth_url, data=data, headers={**session.headers, "Content-type": "application/x-www-form-urlencoded"} + ) try: response.raise_for_status() except HTTPError as exc: @@ -438,6 +451,29 @@ def _response_to_table(self, identifier_tuple: Tuple[str, ...], table_response: catalog=self, ) + def _refresh_token(self, session: Optional[Session] = None, initial_token: Optional[str] = None) -> None: + session = session or self._session + if initial_token is not None: + self.properties[TOKEN] = initial_token + elif CREDENTIAL in self.properties: + self.properties[TOKEN] = self._fetch_access_token(session, self.properties[CREDENTIAL]) + + # Set Auth token for subsequent calls in the session + if token := self.properties.get(TOKEN): + session.headers[AUTHORIZATION_HEADER] = f"{BEARER_PREFIX} {token}" + + def _config_headers(self, session: Session) -> None: + session.headers["Content-type"] = "application/json" + session.headers["X-Client-Version"] = ICEBERG_REST_SPEC_VERSION + session.headers["User-Agent"] = f"PyIceberg/{__version__}" + session.headers["X-Iceberg-Access-Delegation"] = "vended-credentials" + header_properties = self._extract_headers_from_properties() + session.headers.update(header_properties) + + def _extract_headers_from_properties(self) -> Dict[str, str]: + return {key[len(HEADER_PREFIX) :]: value for key, value in self.properties.items() if key.startswith(HEADER_PREFIX)} + + @retry(**_RETRY_ARGS) def create_table( self, identifier: Union[str, Identifier], @@ -447,15 +483,18 @@ def create_table( sort_order: SortOrder = UNSORTED_SORT_ORDER, properties: Properties = EMPTY_DICT, ) -> Table: - schema: Schema = self._convert_schema_if_needed(schema) # type: ignore + iceberg_schema = self._convert_schema_if_needed(schema) + fresh_schema = assign_fresh_schema_ids(iceberg_schema) + fresh_partition_spec = assign_fresh_partition_spec_ids(partition_spec, iceberg_schema, fresh_schema) + fresh_sort_order = assign_fresh_sort_order_ids(sort_order, iceberg_schema, fresh_schema) namespace_and_table = self._split_identifier_for_path(identifier) request = CreateTableRequest( name=namespace_and_table["table"], location=location, - table_schema=schema, - partition_spec=partition_spec, - write_order=sort_order, + table_schema=fresh_schema, + partition_spec=fresh_partition_spec, + write_order=fresh_sort_order, properties=properties, ) serialized_json = request.model_dump_json().encode(UTF8) @@ -471,6 +510,7 @@ def create_table( table_response = TableResponse(**response.json()) return self._response_to_table(self.identifier_to_tuple(identifier), table_response) + @retry(**_RETRY_ARGS) def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table: """Register a new table using existing metadata. @@ -502,6 +542,7 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: table_response = TableResponse(**response.json()) return self._response_to_table(self.identifier_to_tuple(identifier), table_response) + @retry(**_RETRY_ARGS) def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple) @@ -512,6 +553,7 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]: self._handle_non_200_response(exc, {404: NoSuchNamespaceError}) return [(*table.namespace, table.name) for table in ListTablesResponse(**response.json()).identifiers] + @retry(**_RETRY_ARGS) def load_table(self, identifier: Union[str, Identifier]) -> Table: identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) response = self._session.get( @@ -525,6 +567,7 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table: table_response = TableResponse(**response.json()) return self._response_to_table(identifier_tuple, table_response) + @retry(**_RETRY_ARGS) def drop_table(self, identifier: Union[str, Identifier], purge_requested: bool = False) -> None: identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) response = self._session.delete( @@ -537,9 +580,11 @@ def drop_table(self, identifier: Union[str, Identifier], purge_requested: bool = except HTTPError as exc: self._handle_non_200_response(exc, {404: NoSuchTableError}) + @retry(**_RETRY_ARGS) def purge_table(self, identifier: Union[str, Identifier]) -> None: self.drop_table(identifier=identifier, purge_requested=True) + @retry(**_RETRY_ARGS) def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table: from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier) payload = { @@ -554,6 +599,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U return self.load_table(to_identifier) + @retry(**_RETRY_ARGS) def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: """Update the table. @@ -565,6 +611,8 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons Raises: NoSuchTableError: If a table with the given identifier does not exist. + CommitFailedException: Requirement not met, or a conflict with a concurrent commit. + CommitStateUnknownException: Failed due to an internal exception on the side of the catalog. """ response = self._session.post( self.url(Endpoints.update_table, prefixed=True, **self._split_identifier_for_path(table_request.identifier)), @@ -584,6 +632,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons ) return CommitTableResponse(**response.json()) + @retry(**_RETRY_ARGS) def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None: namespace_tuple = self._check_valid_namespace_identifier(namespace) payload = {"namespace": namespace_tuple, "properties": properties} @@ -593,6 +642,7 @@ def create_namespace(self, namespace: Union[str, Identifier], properties: Proper except HTTPError as exc: self._handle_non_200_response(exc, {404: NoSuchNamespaceError, 409: NamespaceAlreadyExistsError}) + @retry(**_RETRY_ARGS) def drop_namespace(self, namespace: Union[str, Identifier]) -> None: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) @@ -602,6 +652,7 @@ def drop_namespace(self, namespace: Union[str, Identifier]) -> None: except HTTPError as exc: self._handle_non_200_response(exc, {404: NoSuchNamespaceError}) + @retry(**_RETRY_ARGS) def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]: namespace_tuple = self.identifier_to_tuple(namespace) response = self._session.get( @@ -619,6 +670,7 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi namespaces = ListNamespaceResponse(**response.json()) return [namespace_tuple + child_namespace for child_namespace in namespaces.namespaces] + @retry(**_RETRY_ARGS) def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) @@ -630,6 +682,7 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper return NamespaceResponse(**response.json()).properties + @retry(**_RETRY_ARGS) def update_namespace_properties( self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = EMPTY_DICT ) -> PropertiesUpdateSummary: diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index 8a02b20dfc..b6b2feeeb0 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -32,7 +32,7 @@ union, update, ) -from sqlalchemy.exc import IntegrityError, NoResultFound, OperationalError +from sqlalchemy.exc import IntegrityError, NoResultFound, OperationalError, ProgrammingError from sqlalchemy.orm import ( DeclarativeBase, Mapped, @@ -101,7 +101,8 @@ def __init__(self, name: str, **properties: str): if not (uri_prop := self.properties.get("uri")): raise NoSuchPropertyException("SQL connection URI is required") - self.engine = create_engine(uri_prop, echo=True) + echo = bool(self.properties.get("echo", False)) + self.engine = create_engine(uri_prop, echo=echo) self._ensure_tables_exist() @@ -111,7 +112,10 @@ def _ensure_tables_exist(self) -> None: stmt = select(1).select_from(table) try: session.scalar(stmt) - except OperationalError: + except ( + OperationalError, + ProgrammingError, + ): # sqlalchemy returns OperationalError in case of sqlite and ProgrammingError with postgres. self.create_tables() return @@ -369,7 +373,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons Raises: NoSuchTableError: If a table with the given identifier does not exist. - CommitFailedException: If the commit failed. + CommitFailedException: Requirement not met, or a conflict with a concurrent commit. """ identifier_tuple = self.identifier_to_tuple_without_catalog( tuple(table_request.identifier.namespace.root + [table_request.identifier.name]) @@ -563,7 +567,9 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. """ - database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) + database_name = self.identifier_to_database(namespace) + if not self._namespace_exists(database_name): + raise NoSuchNamespaceError(f"Database {database_name} does not exists") stmt = select(IcebergNamespaceProperties).where( IcebergNamespaceProperties.catalog_name == self.name, IcebergNamespaceProperties.namespace == database_name diff --git a/pyiceberg/cli/console.py b/pyiceberg/cli/console.py index 092910b5f6..0fbda10960 100644 --- a/pyiceberg/cli/console.py +++ b/pyiceberg/cli/console.py @@ -59,11 +59,22 @@ def wrapper(*args: Any, **kwargs: Any): # type: ignore @click.option("--catalog") @click.option("--verbose", type=click.BOOL) @click.option("--output", type=click.Choice(["text", "json"]), default="text") +@click.option("--ugi") @click.option("--uri") @click.option("--credential") @click.pass_context -def run(ctx: Context, catalog: Optional[str], verbose: bool, output: str, uri: Optional[str], credential: Optional[str]) -> None: +def run( + ctx: Context, + catalog: Optional[str], + verbose: bool, + output: str, + ugi: Optional[str], + uri: Optional[str], + credential: Optional[str], +) -> None: properties = {} + if ugi: + properties["ugi"] = ugi if uri: properties["uri"] = uri if credential: @@ -206,6 +217,23 @@ def version(ctx: Context) -> None: ctx.obj["output"].version(__version__) +@run.group() +def create() -> None: + """Operation to create a namespace.""" + + +@create.command() +@click.argument("identifier") +@click.pass_context +@catch_exception() +def namespace(ctx: Context, identifier: str) -> None: + """Create a namespace.""" + catalog, output = _catalog_and_output(ctx) + + catalog.create_namespace(identifier) + output.text(f"Created namespace: {identifier}") + + @run.group() def drop() -> None: """Operations to drop a namespace or table.""" @@ -223,11 +251,11 @@ def table(ctx: Context, identifier: str) -> None: # noqa: F811 output.text(f"Dropped table: {identifier}") -@drop.command() +@drop.command() # type: ignore @click.argument("identifier") @click.pass_context @catch_exception() -def namespace(ctx: Context, identifier: str) -> None: +def namespace(ctx: Context, identifier: str) -> None: # noqa: F811 """Drop a namespace.""" catalog, output = _catalog_and_output(ctx) diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py index b523deff48..2a03a4de35 100644 --- a/pyiceberg/conversions.py +++ b/pyiceberg/conversions.py @@ -267,7 +267,7 @@ def _(primitive_type: DecimalType, value: Decimal) -> bytes: return decimal_to_bytes(value) -@singledispatch +@singledispatch # type: ignore def from_bytes(primitive_type: PrimitiveType, b: bytes) -> L: # type: ignore """Convert bytes to a built-in python value. diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 7a94ce4c7d..be944ffb36 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -26,6 +26,7 @@ from __future__ import annotations import concurrent.futures +import fnmatch import itertools import logging import os @@ -123,7 +124,8 @@ visit, visit_with_partner, ) -from pyiceberg.table import WriteTask +from pyiceberg.table import PropertyUtil, TableProperties, WriteTask +from pyiceberg.table.metadata import TableMetadata from pyiceberg.table.name_mapping import NameMapping from pyiceberg.transforms import TruncateTransform from pyiceberg.typedef import EMPTY_DICT, Properties, Record @@ -532,7 +534,7 @@ def visit_uuid(self, _: UUIDType) -> pa.DataType: return pa.binary(16) def visit_binary(self, _: BinaryType) -> pa.DataType: - return pa.binary() + return pa.large_binary() def _convert_scalar(value: Any, iceberg_type: IcebergType) -> pa.scalar: @@ -654,6 +656,10 @@ def pyarrow_to_schema(schema: pa.Schema, name_mapping: Optional[NameMapping] = N return visit_pyarrow(schema, visitor) +def _pyarrow_to_schema_without_ids(schema: pa.Schema) -> Schema: + return visit_pyarrow(schema, _ConvertToIcebergWithoutIDs()) + + @singledispatch def visit_pyarrow(obj: Union[pa.DataType, pa.Schema], visitor: PyArrowSchemaVisitor[T]) -> T: """Apply a pyarrow schema visitor to any point within a schema. @@ -689,7 +695,9 @@ def _(obj: pa.StructType, visitor: PyArrowSchemaVisitor[T]) -> T: @visit_pyarrow.register(pa.ListType) -def _(obj: pa.ListType, visitor: PyArrowSchemaVisitor[T]) -> T: +@visit_pyarrow.register(pa.FixedSizeListType) +@visit_pyarrow.register(pa.LargeListType) +def _(obj: Union[pa.ListType, pa.LargeListType, pa.FixedSizeListType], visitor: PyArrowSchemaVisitor[T]) -> T: visitor.before_list_element(obj.value_field) result = visit_pyarrow(obj.value_type, visitor) visitor.after_list_element(obj.value_field) @@ -811,12 +819,9 @@ def __init__(self, name_mapping: Optional[NameMapping] = None) -> None: self._field_names = [] self._name_mapping = name_mapping - def _current_path(self) -> str: - return ".".join(self._field_names) - def _field_id(self, field: pa.Field) -> int: if self._name_mapping: - return self._name_mapping.find(self._current_path()).field_id + return self._name_mapping.find(*self._field_names).field_id elif (field_id := _get_field_id(field)) is not None: return field_id else: @@ -855,10 +860,15 @@ def map(self, map_type: pa.MapType, key_result: IcebergType, value_result: Icebe def primitive(self, primitive: pa.DataType) -> PrimitiveType: if pa.types.is_boolean(primitive): return BooleanType() - elif pa.types.is_int32(primitive): - return IntegerType() - elif pa.types.is_int64(primitive): - return LongType() + elif pa.types.is_integer(primitive): + width = primitive.bit_width + if width <= 32: + return IntegerType() + elif width <= 64: + return LongType() + else: + # Does not exist (yet) + raise TypeError(f"Unsupported integer type: {primitive}") elif pa.types.is_float32(primitive): return FloatType() elif pa.types.is_float64(primitive): @@ -866,7 +876,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType: elif isinstance(primitive, pa.Decimal128Type): primitive = cast(pa.Decimal128Type, primitive) return DecimalType(primitive.precision, primitive.scale) - elif pa.types.is_string(primitive): + elif pa.types.is_string(primitive) or pa.types.is_large_string(primitive): return StringType() elif pa.types.is_date32(primitive): return DateType() @@ -879,7 +889,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType: return TimestamptzType() elif primitive.tz is None: return TimestampType() - elif pa.types.is_binary(primitive): + elif pa.types.is_binary(primitive) or pa.types.is_large_binary(primitive): return BinaryType() elif pa.types.is_fixed_size_binary(primitive): primitive = cast(pa.FixedSizeBinaryType, primitive) @@ -1333,13 +1343,22 @@ def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc def serialize(self, value: Any) -> bytes: return to_bytes(self.primitive_type, value) - def update_min(self, val: Any) -> None: - self.current_min = val if self.current_min is None else min(val, self.current_min) + def update_min(self, val: Optional[Any]) -> None: + if self.current_min is None: + self.current_min = val + elif val is not None: + self.current_min = min(val, self.current_min) - def update_max(self, val: Any) -> None: - self.current_max = val if self.current_max is None else max(val, self.current_max) + def update_max(self, val: Optional[Any]) -> None: + if self.current_max is None: + self.current_max = val + elif val is not None: + self.current_max = max(val, self.current_max) + + def min_as_bytes(self) -> Optional[bytes]: + if self.current_min is None: + return None - def min_as_bytes(self) -> bytes: return self.serialize( self.current_min if self.trunc_length is None @@ -1377,19 +1396,12 @@ class MetricModeTypes(Enum): FULL = "full" -DEFAULT_METRICS_MODE_KEY = "write.metadata.metrics.default" -COLUMN_METRICS_MODE_KEY_PREFIX = "write.metadata.metrics.column" - - @dataclass(frozen=True) class MetricsMode(Singleton): type: MetricModeTypes length: Optional[int] = None -_DEFAULT_METRICS_MODE = MetricsMode(MetricModeTypes.TRUNCATE, DEFAULT_TRUNCATION_LENGTH) - - def match_metrics_mode(mode: str) -> MetricsMode: sanitized_mode = mode.strip().lower() if sanitized_mode.startswith("truncate"): @@ -1423,12 +1435,14 @@ class PyArrowStatisticsCollector(PreOrderSchemaVisitor[List[StatisticsCollector] _field_id: int = 0 _schema: Schema _properties: Dict[str, str] - _default_mode: Optional[str] + _default_mode: str def __init__(self, schema: Schema, properties: Dict[str, str]): self._schema = schema self._properties = properties - self._default_mode = self._properties.get(DEFAULT_METRICS_MODE_KEY) + self._default_mode = self._properties.get( + TableProperties.DEFAULT_WRITE_METRICS_MODE, TableProperties.DEFAULT_WRITE_METRICS_MODE_DEFAULT + ) def schema(self, schema: Schema, struct_result: Callable[[], List[StatisticsCollector]]) -> List[StatisticsCollector]: return struct_result() @@ -1463,12 +1477,9 @@ def primitive(self, primitive: PrimitiveType) -> List[StatisticsCollector]: if column_name is None: return [] - metrics_mode = _DEFAULT_METRICS_MODE - - if self._default_mode: - metrics_mode = match_metrics_mode(self._default_mode) + metrics_mode = match_metrics_mode(self._default_mode) - col_mode = self._properties.get(f"{COLUMN_METRICS_MODE_KEY_PREFIX}.{column_name}") + col_mode = self._properties.get(f"{TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX}.{column_name}") if col_mode: metrics_mode = match_metrics_mode(col_mode) @@ -1710,7 +1721,7 @@ def fill_parquet_file_metadata( data_file.split_offsets = split_offsets -def write_file(table: Table, tasks: Iterator[WriteTask]) -> Iterator[DataFile]: +def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteTask]) -> Iterator[DataFile]: task = next(tasks) try: @@ -1720,14 +1731,21 @@ def write_file(table: Table, tasks: Iterator[WriteTask]) -> Iterator[DataFile]: except StopIteration: pass - file_path = f'{table.location()}/data/{task.generate_data_file_filename("parquet")}' - file_schema = schema_to_pyarrow(table.schema()) + parquet_writer_kwargs = _get_parquet_writer_kwargs(table_metadata.properties) - collected_metrics: List[pq.FileMetaData] = [] - fo = table.io.new_output(file_path) + file_path = f'{table_metadata.location}/data/{task.generate_data_file_filename("parquet")}' + schema = table_metadata.schema() + arrow_file_schema = schema_to_pyarrow(schema) + + fo = io.new_output(file_path) + row_group_size = PropertyUtil.property_as_int( + properties=table_metadata.properties, + property_name=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES, + default=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT, + ) with fo.create(overwrite=True) as fos: - with pq.ParquetWriter(fos, schema=file_schema, version="1.0", metadata_collector=collected_metrics) as writer: - writer.write_table(task.df) + with pq.ParquetWriter(fos, schema=arrow_file_schema, **parquet_writer_kwargs) as writer: + writer.write_table(task.df, row_group_size=row_group_size) data_file = DataFile( content=DataFileContent.DATA, @@ -1735,21 +1753,64 @@ def write_file(table: Table, tasks: Iterator[WriteTask]) -> Iterator[DataFile]: file_format=FileFormat.PARQUET, partition=Record(), file_size_in_bytes=len(fo), - sort_order_id=task.sort_order_id, + # After this has been fixed: + # https://github.com/apache/iceberg-python/issues/271 + # sort_order_id=task.sort_order_id, + sort_order_id=None, # Just copy these from the table for now - spec_id=table.spec().spec_id, + spec_id=table_metadata.default_spec_id, equality_ids=None, key_metadata=None, ) - if len(collected_metrics) != 1: - # One file has been written - raise ValueError(f"Expected 1 entry, got: {collected_metrics}") - fill_parquet_file_metadata( data_file=data_file, - parquet_metadata=collected_metrics[0], - stats_columns=compute_statistics_plan(table.schema(), table.properties), - parquet_column_mapping=parquet_path_to_id_mapping(table.schema()), + parquet_metadata=writer.writer.metadata, + stats_columns=compute_statistics_plan(schema, table_metadata.properties), + parquet_column_mapping=parquet_path_to_id_mapping(schema), ) return iter([data_file]) + + +ICEBERG_UNCOMPRESSED_CODEC = "uncompressed" +PYARROW_UNCOMPRESSED_CODEC = "none" + + +def _get_parquet_writer_kwargs(table_properties: Properties) -> Dict[str, Any]: + for key_pattern in [ + TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES, + TableProperties.PARQUET_PAGE_ROW_LIMIT, + TableProperties.PARQUET_BLOOM_FILTER_MAX_BYTES, + f"{TableProperties.PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX}.*", + ]: + if unsupported_keys := fnmatch.filter(table_properties, key_pattern): + raise NotImplementedError(f"Parquet writer option(s) {unsupported_keys} not implemented") + + compression_codec = table_properties.get(TableProperties.PARQUET_COMPRESSION, TableProperties.PARQUET_COMPRESSION_DEFAULT) + compression_level = PropertyUtil.property_as_int( + properties=table_properties, + property_name=TableProperties.PARQUET_COMPRESSION_LEVEL, + default=TableProperties.PARQUET_COMPRESSION_LEVEL_DEFAULT, + ) + if compression_codec == ICEBERG_UNCOMPRESSED_CODEC: + compression_codec = PYARROW_UNCOMPRESSED_CODEC + + return { + "compression": compression_codec, + "compression_level": compression_level, + "data_page_size": PropertyUtil.property_as_int( + properties=table_properties, + property_name=TableProperties.PARQUET_PAGE_SIZE_BYTES, + default=TableProperties.PARQUET_PAGE_SIZE_BYTES_DEFAULT, + ), + "dictionary_pagesize_limit": PropertyUtil.property_as_int( + properties=table_properties, + property_name=TableProperties.PARQUET_DICT_SIZE_BYTES, + default=TableProperties.PARQUET_DICT_SIZE_BYTES_DEFAULT, + ), + "write_batch_size": PropertyUtil.property_as_int( + properties=table_properties, + property_name=TableProperties.PARQUET_PAGE_ROW_LIMIT, + default=TableProperties.PARQUET_PAGE_ROW_LIMIT_DEFAULT, + ), + } diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py index f6307f0f8c..6fa0286282 100644 --- a/pyiceberg/partitioning.py +++ b/pyiceberg/partitioning.py @@ -16,14 +16,21 @@ # under the License. from __future__ import annotations -from functools import cached_property +import uuid +from abc import ABC, abstractmethod +from dataclasses import dataclass +from datetime import date, datetime +from functools import cached_property, singledispatch from typing import ( Any, Dict, + Generic, List, Optional, Tuple, + TypeVar, ) +from urllib.parse import quote from pydantic import ( BeforeValidator, @@ -34,9 +41,30 @@ from typing_extensions import Annotated from pyiceberg.schema import Schema -from pyiceberg.transforms import Transform, parse_transform -from pyiceberg.typedef import IcebergBaseModel -from pyiceberg.types import NestedField, StructType +from pyiceberg.transforms import ( + BucketTransform, + DayTransform, + HourTransform, + IdentityTransform, + Transform, + TruncateTransform, + UnknownTransform, + VoidTransform, + YearTransform, + parse_transform, +) +from pyiceberg.typedef import IcebergBaseModel, Record +from pyiceberg.types import ( + DateType, + IcebergType, + NestedField, + PrimitiveType, + StructType, + TimestampType, + TimestamptzType, + UUIDType, +) +from pyiceberg.utils.datetime import date_to_days, datetime_to_micros INITIAL_PARTITION_SPEC_ID = 0 PARTITION_FIELD_ID_START: int = 1000 @@ -143,7 +171,7 @@ def is_unpartitioned(self) -> bool: def last_assigned_field_id(self) -> int: if self.fields: return max(pf.field_id for pf in self.fields) - return PARTITION_FIELD_ID_START + return PARTITION_FIELD_ID_START - 1 @cached_property def source_id_to_fields_map(self) -> Dict[int, List[PartitionField]]: @@ -193,6 +221,23 @@ def partition_type(self, schema: Schema) -> StructType: nested_fields.append(NestedField(field.field_id, field.name, result_type, required=False)) return StructType(*nested_fields) + def partition_to_path(self, data: Record, schema: Schema) -> str: + partition_type = self.partition_type(schema) + field_types = partition_type.fields + + field_strs = [] + value_strs = [] + for pos, value in enumerate(data.record_fields()): + partition_field = self.fields[pos] + value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=value) + + value_str = quote(value_str, safe='') + value_strs.append(value_str) + field_strs.append(partition_field.name) + + path = "/".join([field_str + "=" + value_str for field_str, value_str in zip(field_strs, value_strs)]) + return path + UNPARTITIONED_PARTITION_SPEC = PartitionSpec(spec_id=0) @@ -215,3 +260,164 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre ) ) return PartitionSpec(*partition_fields, spec_id=INITIAL_PARTITION_SPEC_ID) + + +T = TypeVar("T") + + +class PartitionSpecVisitor(Generic[T], ABC): + @abstractmethod + def identity(self, field_id: int, source_name: str, source_id: int) -> T: + """Visit identity partition field.""" + + @abstractmethod + def bucket(self, field_id: int, source_name: str, source_id: int, num_buckets: int) -> T: + """Visit bucket partition field.""" + + @abstractmethod + def truncate(self, field_id: int, source_name: str, source_id: int, width: int) -> T: + """Visit truncate partition field.""" + + @abstractmethod + def year(self, field_id: int, source_name: str, source_id: int) -> T: + """Visit year partition field.""" + + @abstractmethod + def month(self, field_id: int, source_name: str, source_id: int) -> T: + """Visit month partition field.""" + + @abstractmethod + def day(self, field_id: int, source_name: str, source_id: int) -> T: + """Visit day partition field.""" + + @abstractmethod + def hour(self, field_id: int, source_name: str, source_id: int) -> T: + """Visit hour partition field.""" + + @abstractmethod + def always_null(self, field_id: int, source_name: str, source_id: int) -> T: + """Visit void partition field.""" + + @abstractmethod + def unknown(self, field_id: int, source_name: str, source_id: int, transform: str) -> T: + """Visit unknown partition field.""" + raise ValueError(f"Unknown transform is not supported: {transform}") + + +class _PartitionNameGenerator(PartitionSpecVisitor[str]): + def identity(self, field_id: int, source_name: str, source_id: int) -> str: + return source_name + + def bucket(self, field_id: int, source_name: str, source_id: int, num_buckets: int) -> str: + return f"{source_name}_bucket_{num_buckets}" + + def truncate(self, field_id: int, source_name: str, source_id: int, width: int) -> str: + return source_name + "_trunc_" + str(width) + + def year(self, field_id: int, source_name: str, source_id: int) -> str: + return source_name + "_year" + + def month(self, field_id: int, source_name: str, source_id: int) -> str: + return source_name + "_month" + + def day(self, field_id: int, source_name: str, source_id: int) -> str: + return source_name + "_day" + + def hour(self, field_id: int, source_name: str, source_id: int) -> str: + return source_name + "_hour" + + def always_null(self, field_id: int, source_name: str, source_id: int) -> str: + return source_name + "_null" + + def unknown(self, field_id: int, source_name: str, source_id: int, transform: str) -> str: + return super().unknown(field_id, source_name, source_id, transform) + + +R = TypeVar("R") + + +@singledispatch +def _visit(spec: PartitionSpec, schema: Schema, visitor: PartitionSpecVisitor[R]) -> List[R]: + return [_visit_partition_field(schema, field, visitor) for field in spec.fields] + + +def _visit_partition_field(schema: Schema, field: PartitionField, visitor: PartitionSpecVisitor[R]) -> R: + source_name = schema.find_column_name(field.source_id) + if not source_name: + raise ValueError(f"Could not find field with id {field.source_id}") + + transform = field.transform + if isinstance(transform, IdentityTransform): + return visitor.identity(field.field_id, source_name, field.source_id) + elif isinstance(transform, BucketTransform): + return visitor.bucket(field.field_id, source_name, field.source_id, transform.num_buckets) + elif isinstance(transform, TruncateTransform): + return visitor.truncate(field.field_id, source_name, field.source_id, transform.width) + elif isinstance(transform, DayTransform): + return visitor.day(field.field_id, source_name, field.source_id) + elif isinstance(transform, HourTransform): + return visitor.hour(field.field_id, source_name, field.source_id) + elif isinstance(transform, YearTransform): + return visitor.year(field.field_id, source_name, field.source_id) + elif isinstance(transform, VoidTransform): + return visitor.always_null(field.field_id, source_name, field.source_id) + elif isinstance(transform, UnknownTransform): + return visitor.unknown(field.field_id, source_name, field.source_id, repr(transform)) + else: + raise ValueError(f"Unknown transform {transform}") + + +@dataclass(frozen=True) +class PartitionFieldValue: + field: PartitionField + value: Any + + +@dataclass(frozen=True) +class PartitionKey: + raw_partition_field_values: List[PartitionFieldValue] + partition_spec: PartitionSpec + schema: Schema + + @cached_property + def partition(self) -> Record: # partition key transformed with iceberg internal representation as input + iceberg_typed_key_values = {} + for raw_partition_field_value in self.raw_partition_field_values: + partition_fields = self.partition_spec.source_id_to_fields_map[raw_partition_field_value.field.source_id] + if len(partition_fields) != 1: + raise ValueError("partition_fields must contain exactly one field.") + partition_field = partition_fields[0] + iceberg_type = self.schema.find_field(name_or_id=raw_partition_field_value.field.source_id).field_type + iceberg_typed_value = _to_partition_representation(iceberg_type, raw_partition_field_value.value) + transformed_value = partition_field.transform.transform(iceberg_type)(iceberg_typed_value) + iceberg_typed_key_values[partition_field.name] = transformed_value + return Record(**iceberg_typed_key_values) + + def to_path(self) -> str: + return self.partition_spec.partition_to_path(self.partition, self.schema) + + +@singledispatch +def _to_partition_representation(type: IcebergType, value: Any) -> Any: + return TypeError(f"Unsupported partition field type: {type}") + + +@_to_partition_representation.register(TimestampType) +@_to_partition_representation.register(TimestamptzType) +def _(type: IcebergType, value: Optional[datetime]) -> Optional[int]: + return datetime_to_micros(value) if value is not None else None + + +@_to_partition_representation.register(DateType) +def _(type: IcebergType, value: Optional[date]) -> Optional[int]: + return date_to_days(value) if value is not None else None + + +@_to_partition_representation.register(UUIDType) +def _(type: IcebergType, value: Optional[uuid.UUID]) -> Optional[str]: + return str(value) if value is not None else None + + +@_to_partition_representation.register(PrimitiveType) +def _(type: IcebergType, value: Optional[Any]) -> Optional[Any]: + return value diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py index 6dd174f325..e805895a7b 100644 --- a/pyiceberg/schema.py +++ b/pyiceberg/schema.py @@ -22,6 +22,7 @@ from dataclasses import dataclass from functools import cached_property, partial, singledispatch from typing import ( + TYPE_CHECKING, Any, Callable, Dict, @@ -62,6 +63,11 @@ UUIDType, ) +if TYPE_CHECKING: + from pyiceberg.table.name_mapping import ( + NameMapping, + ) + T = TypeVar("T") P = TypeVar("P") @@ -221,6 +227,12 @@ def find_type(self, name_or_id: Union[str, int], case_sensitive: bool = True) -> def highest_field_id(self) -> int: return max(self._lazy_id_to_name.keys(), default=0) + @cached_property + def name_mapping(self) -> NameMapping: + from pyiceberg.table.name_mapping import create_mapping_from_schema + + return create_mapping_from_schema(self) + def find_column_name(self, column_id: int) -> Optional[str]: """Find a column name given a column ID. diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 26eecefd0f..1a4183c914 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -31,6 +31,7 @@ Any, Callable, Dict, + Generic, Iterable, List, Literal, @@ -51,6 +52,7 @@ And, BooleanExpression, EqualTo, + Reference, parser, visitors, ) @@ -67,7 +69,15 @@ write_manifest, write_manifest_list, ) -from pyiceberg.partitioning import PartitionSpec +from pyiceberg.partitioning import ( + INITIAL_PARTITION_SPEC_ID, + PARTITION_FIELD_ID_START, + IdentityTransform, + PartitionField, + PartitionSpec, + _PartitionNameGenerator, + _visit_partition_field, +) from pyiceberg.schema import ( PartnerAccessor, Schema, @@ -85,10 +95,9 @@ TableMetadataUtil, ) from pyiceberg.table.name_mapping import ( - SCHEMA_NAME_MAPPING_DEFAULT, NameMapping, - create_mapping_from_schema, parse_mapping_from_json, + update_mapping, ) from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef from pyiceberg.table.snapshots import ( @@ -100,6 +109,7 @@ update_snapshot_summaries, ) from pyiceberg.table.sorting import SortOrder +from pyiceberg.transforms import TimeTransform, Transform, VoidTransform from pyiceberg.typedef import ( EMPTY_DICT, IcebergBaseModel, @@ -120,6 +130,7 @@ from pyiceberg.utils.datetime import datetime_to_millis if TYPE_CHECKING: + import daft import pandas as pd import pyarrow as pa import ray @@ -127,26 +138,116 @@ from pyiceberg.catalog import Catalog + ALWAYS_TRUE = AlwaysTrue() TABLE_ROOT_ID = -1 _JAVA_LONG_MAX = 9223372036854775807 +def _check_schema(table_schema: Schema, other_schema: "pa.Schema") -> None: + from pyiceberg.io.pyarrow import _pyarrow_to_schema_without_ids, pyarrow_to_schema + + name_mapping = table_schema.name_mapping + try: + task_schema = pyarrow_to_schema(other_schema, name_mapping=name_mapping) + except ValueError as e: + other_schema = _pyarrow_to_schema_without_ids(other_schema) + additional_names = set(other_schema.column_names) - set(table_schema.column_names) + raise ValueError( + f"PyArrow table contains more columns: {', '.join(sorted(additional_names))}. Update the schema first (hint, use union_by_name)." + ) from e + + if table_schema.as_struct() != task_schema.as_struct(): + from rich.console import Console + from rich.table import Table as RichTable + + console = Console(record=True) + + rich_table = RichTable(show_header=True, header_style="bold") + rich_table.add_column("") + rich_table.add_column("Table field") + rich_table.add_column("Dataframe field") + + for lhs in table_schema.fields: + try: + rhs = task_schema.find_field(lhs.field_id) + rich_table.add_row("✅" if lhs == rhs else "❌", str(lhs), str(rhs)) + except ValueError: + rich_table.add_row("❌", str(lhs), "Missing") + + console.print(rich_table) + raise ValueError(f"Mismatch in fields:\n{console.export_text()}") + + +class TableProperties: + PARQUET_ROW_GROUP_SIZE_BYTES = "write.parquet.row-group-size-bytes" + PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT = 128 * 1024 * 1024 # 128 MB + + PARQUET_ROW_GROUP_LIMIT = "write.parquet.row-group-limit" + PARQUET_ROW_GROUP_LIMIT_DEFAULT = 128 * 1024 * 1024 # 128 MB + + PARQUET_PAGE_SIZE_BYTES = "write.parquet.page-size-bytes" + PARQUET_PAGE_SIZE_BYTES_DEFAULT = 1024 * 1024 # 1 MB + + PARQUET_PAGE_ROW_LIMIT = "write.parquet.page-row-limit" + PARQUET_PAGE_ROW_LIMIT_DEFAULT = 20000 + + PARQUET_DICT_SIZE_BYTES = "write.parquet.dict-size-bytes" + PARQUET_DICT_SIZE_BYTES_DEFAULT = 2 * 1024 * 1024 # 2 MB + + PARQUET_COMPRESSION = "write.parquet.compression-codec" + PARQUET_COMPRESSION_DEFAULT = "zstd" + + PARQUET_COMPRESSION_LEVEL = "write.parquet.compression-level" + PARQUET_COMPRESSION_LEVEL_DEFAULT = None + + PARQUET_BLOOM_FILTER_MAX_BYTES = "write.parquet.bloom-filter-max-bytes" + PARQUET_BLOOM_FILTER_MAX_BYTES_DEFAULT = 1024 * 1024 + + PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX = "write.parquet.bloom-filter-enabled.column" + + DEFAULT_WRITE_METRICS_MODE = "write.metadata.metrics.default" + DEFAULT_WRITE_METRICS_MODE_DEFAULT = "truncate(16)" + + METRICS_MODE_COLUMN_CONF_PREFIX = "write.metadata.metrics.column" + + DEFAULT_NAME_MAPPING = "schema.name-mapping.default" + FORMAT_VERSION = "format-version" + DEFAULT_FORMAT_VERSION = 2 + + +class PropertyUtil: + @staticmethod + def property_as_int(properties: Dict[str, str], property_name: str, default: Optional[int] = None) -> Optional[int]: + if value := properties.get(property_name): + try: + return int(value) + except ValueError as e: + raise ValueError(f"Could not parse table property {property_name} to an integer: {value}") from e + else: + return default + + class Transaction: _table: Table + table_metadata: TableMetadata + _autocommit: bool _updates: Tuple[TableUpdate, ...] _requirements: Tuple[TableRequirement, ...] - def __init__( - self, - table: Table, - actions: Optional[Tuple[TableUpdate, ...]] = None, - requirements: Optional[Tuple[TableRequirement, ...]] = None, - ): + def __init__(self, table: Table, autocommit: bool = False): + """Open a transaction to stage and commit changes to a table. + + Args: + table: The table that will be altered. + autocommit: Option to automatically commit the changes when they are staged. + """ + self.table_metadata = table.metadata self._table = table - self._updates = actions or () - self._requirements = requirements or () + self._autocommit = autocommit + self._updates = () + self._requirements = () def __enter__(self) -> Transaction: """Start a transaction to update the table.""" @@ -154,49 +255,23 @@ def __enter__(self) -> Transaction: def __exit__(self, _: Any, value: Any, traceback: Any) -> None: """Close and commit the transaction.""" - fresh_table = self.commit_transaction() - # Update the new data in place - self._table.metadata = fresh_table.metadata - self._table.metadata_location = fresh_table.metadata_location + self.commit_transaction() - def _append_updates(self, *new_updates: TableUpdate) -> Transaction: - """Append updates to the set of staged updates. - - Args: - *new_updates: Any new updates. - - Raises: - ValueError: When the type of update is not unique. - - Returns: - Transaction object with the new updates appended. - """ - for new_update in new_updates: - # explicitly get type of new_update as new_update is an instantiated class - type_new_update = type(new_update) - if any(isinstance(update, type_new_update) for update in self._updates): - raise ValueError(f"Updates in a single commit need to be unique, duplicate: {type_new_update}") - self._updates = self._updates + new_updates - return self + def _apply(self, updates: Tuple[TableUpdate, ...], requirements: Tuple[TableRequirement, ...] = ()) -> Transaction: + """Check if the requirements are met, and applies the updates to the metadata.""" + for requirement in requirements: + requirement.validate(self.table_metadata) - def _append_requirements(self, *new_requirements: TableRequirement) -> Transaction: - """Append requirements to the set of staged requirements. + self._updates += updates + self._requirements += requirements - Args: - *new_requirements: Any new requirements. + self.table_metadata = update_table_metadata(self.table_metadata, updates) - Raises: - ValueError: When the type of requirement is not unique. + if self._autocommit: + self.commit_transaction() + self._updates = () + self._requirements = () - Returns: - Transaction object with the new requirements appended. - """ - for new_requirement in new_requirements: - # explicitly get type of new_update as requirement is an instantiated class - type_new_requirement = type(new_requirement) - if any(isinstance(requirement, type_new_requirement) for requirement in self._requirements): - raise ValueError(f"Requirements in a single commit need to be unique, duplicate: {type_new_requirement}") - self._requirements = self._requirements + new_requirements return self def upgrade_table_version(self, format_version: Literal[1, 2]) -> Transaction: @@ -213,10 +288,11 @@ def upgrade_table_version(self, format_version: Literal[1, 2]) -> Transaction: if format_version < self._table.metadata.format_version: raise ValueError(f"Cannot downgrade v{self._table.metadata.format_version} table to v{format_version}") + if format_version > self._table.metadata.format_version: - return self._append_updates(UpgradeFormatVersionUpdate(format_version=format_version)) - else: - return self + return self._apply((UpgradeFormatVersionUpdate(format_version=format_version),)) + + return self def set_properties(self, **updates: str) -> Transaction: """Set properties. @@ -229,56 +305,35 @@ def set_properties(self, **updates: str) -> Transaction: Returns: The alter table builder. """ - return self._append_updates(SetPropertiesUpdate(updates=updates)) + return self._apply((SetPropertiesUpdate(updates=updates),)) - def add_snapshot(self, snapshot: Snapshot) -> Transaction: - """Add a new snapshot to the table. + def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive: bool = True) -> UpdateSchema: + """Create a new UpdateSchema to alter the columns of this table. + + Args: + allow_incompatible_changes: If changes are allowed that might break downstream consumers. + case_sensitive: If field names are case-sensitive. Returns: - The transaction with the add-snapshot staged. + A new UpdateSchema. """ - self._append_updates(AddSnapshotUpdate(snapshot=snapshot)) - self._append_requirements(AssertTableUUID(uuid=self._table.metadata.table_uuid)) - - return self + return UpdateSchema(self, allow_incompatible_changes=allow_incompatible_changes, case_sensitive=case_sensitive) - def set_ref_snapshot( - self, - snapshot_id: int, - parent_snapshot_id: Optional[int], - ref_name: str, - type: str, - max_age_ref_ms: Optional[int] = None, - max_snapshot_age_ms: Optional[int] = None, - min_snapshots_to_keep: Optional[int] = None, - ) -> Transaction: - """Update a ref to a snapshot. + def update_snapshot(self) -> UpdateSnapshot: + """Create a new UpdateSnapshot to produce a new snapshot for the table. Returns: - The transaction with the set-snapshot-ref staged + A new UpdateSnapshot """ - self._append_updates( - SetSnapshotRefUpdate( - snapshot_id=snapshot_id, - parent_snapshot_id=parent_snapshot_id, - ref_name=ref_name, - type=type, - max_age_ref_ms=max_age_ref_ms, - max_snapshot_age_ms=max_snapshot_age_ms, - min_snapshots_to_keep=min_snapshots_to_keep, - ) - ) + return UpdateSnapshot(self, io=self._table.io) - self._append_requirements(AssertRefSnapshotId(snapshot_id=parent_snapshot_id, ref="main")) - return self - - def update_schema(self) -> UpdateSchema: - """Create a new UpdateSchema to alter the columns of this table. + def update_spec(self) -> UpdateSpec: + """Create a new UpdateSpec to update the partitioning of the table. Returns: - A new UpdateSchema. + A new UpdateSpec. """ - return UpdateSchema(self._table, self) + return UpdateSpec(self) def remove_properties(self, *removals: str) -> Transaction: """Remove properties. @@ -289,7 +344,7 @@ def remove_properties(self, *removals: str) -> Transaction: Returns: The alter table builder. """ - return self._append_updates(RemovePropertiesUpdate(removals=removals)) + return self._apply((RemovePropertiesUpdate(removals=removals),)) def update_location(self, location: str) -> Transaction: """Set the new table location. @@ -308,7 +363,6 @@ def commit_transaction(self) -> Table: Returns: The table with the updates applied. """ - # Strip the catalog name if len(self._updates) > 0: self._table._do_commit( # pylint: disable=W0212 updates=self._updates, @@ -444,6 +498,13 @@ def is_added_schema(self, schema_id: int) -> bool: update.schema_.schema_id == schema_id for update in self._updates if update.action == TableUpdateAction.add_schema ) + def is_added_sort_order(self, sort_order_id: int) -> bool: + return any( + update.sort_order.order_id == sort_order_id + for update in self._updates + if update.action == TableUpdateAction.add_sort_order + ) + @singledispatch def _apply_table_update(update: TableUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata: @@ -536,6 +597,43 @@ def _(update: SetCurrentSchemaUpdate, base_metadata: TableMetadata, context: _Ta return base_metadata.model_copy(update={"current_schema_id": new_schema_id}) +@_apply_table_update.register(AddPartitionSpecUpdate) +def _(update: AddPartitionSpecUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata: + for spec in base_metadata.partition_specs: + if spec.spec_id == update.spec.spec_id: + raise ValueError(f"Partition spec with id {spec.spec_id} already exists: {spec}") + context.add_update(update) + return base_metadata.model_copy( + update={ + "partition_specs": base_metadata.partition_specs + [update.spec], + "last_partition_id": max( + max(field.field_id for field in update.spec.fields), + base_metadata.last_partition_id or PARTITION_FIELD_ID_START - 1, + ), + } + ) + + +@_apply_table_update.register(SetDefaultSpecUpdate) +def _(update: SetDefaultSpecUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata: + new_spec_id = update.spec_id + if new_spec_id == -1: + new_spec_id = max(spec.spec_id for spec in base_metadata.partition_specs) + if new_spec_id == base_metadata.default_spec_id: + return base_metadata + found_spec_id = False + for spec in base_metadata.partition_specs: + found_spec_id = spec.spec_id == new_spec_id + if found_spec_id: + break + + if not found_spec_id: + raise ValueError(f"Failed to find spec with id {new_spec_id}") + + context.add_update(update) + return base_metadata.model_copy(update={"default_spec_id": new_spec_id}) + + @_apply_table_update.register(AddSnapshotUpdate) def _(update: AddSnapshotUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata: if len(base_metadata.schemas) == 0: @@ -606,6 +704,36 @@ def _(update: SetSnapshotRefUpdate, base_metadata: TableMetadata, context: _Tabl return base_metadata.model_copy(update=metadata_updates) +@_apply_table_update.register(AddSortOrderUpdate) +def _(update: AddSortOrderUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata: + context.add_update(update) + return base_metadata.model_copy( + update={ + "sort_orders": base_metadata.sort_orders + [update.sort_order], + } + ) + + +@_apply_table_update.register(SetDefaultSortOrderUpdate) +def _(update: SetDefaultSortOrderUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata: + new_sort_order_id = update.sort_order_id + if new_sort_order_id == -1: + # The last added sort order should be in base_metadata.sort_orders at this point + new_sort_order_id = max(sort_order.order_id for sort_order in base_metadata.sort_orders) + if not context.is_added_sort_order(new_sort_order_id): + raise ValueError("Cannot set current sort order to the last added one when no sort order has been added") + + if new_sort_order_id == base_metadata.default_sort_order_id: + return base_metadata + + sort_order = base_metadata.sort_order_by_id(new_sort_order_id) + if sort_order is None: + raise ValueError(f"Sort order with id {new_sort_order_id} does not exist") + + context.add_update(update) + return base_metadata.model_copy(update={"default_sort_order_id": new_sort_order_id}) + + def update_table_metadata(base_metadata: TableMetadata, updates: Tuple[TableUpdate, ...]) -> TableMetadata: """Update the table metadata with the given updates in one transaction. @@ -723,7 +851,7 @@ class AssertLastAssignedPartitionId(TableRequirement): """The table's last assigned partition id must match the requirement's `last-assigned-partition-id`.""" type: Literal["assert-last-assigned-partition-id"] = Field(default="assert-last-assigned-partition-id") - last_assigned_partition_id: int = Field(..., alias="last-assigned-partition-id") + last_assigned_partition_id: Optional[int] = Field(..., alias="last-assigned-partition-id") def validate(self, base_metadata: Optional[TableMetadata]) -> None: if base_metadata is None: @@ -764,6 +892,9 @@ def validate(self, base_metadata: Optional[TableMetadata]) -> None: ) +UpdatesAndRequirements = Tuple[Tuple[TableUpdate, ...], Tuple[TableRequirement, ...]] + + class Namespace(IcebergRootModel[List[str]]): """Reference to one or more levels of a namespace.""" @@ -808,6 +939,11 @@ def __init__( self.catalog = catalog def transaction(self) -> Transaction: + """Create a new transaction object to first stage the changes, and then commit them to the catalog. + + Returns: + The transaction object + """ return Transaction(self) def refresh(self) -> Table: @@ -871,6 +1007,12 @@ def sort_orders(self) -> Dict[int, SortOrder]: """Return a dict of the sort orders of this table.""" return {sort_order.order_id: sort_order for sort_order in self.metadata.sort_orders} + def last_partition_id(self) -> int: + """Return the highest assigned partition field ID across all specs or 999 if only the unpartitioned spec exists.""" + if self.metadata.last_partition_id: + return self.metadata.last_partition_id + return PARTITION_FIELD_ID_START - 1 + @property def properties(self) -> Dict[str, str]: """Properties of the table.""" @@ -884,17 +1026,6 @@ def location(self) -> str: def last_sequence_number(self) -> int: return self.metadata.last_sequence_number - def next_sequence_number(self) -> int: - return self.last_sequence_number + 1 if self.metadata.format_version > 1 else INITIAL_SEQUENCE_NUMBER - - def new_snapshot_id(self) -> int: - """Generate a new snapshot-id that's not in use.""" - snapshot_id = _generate_snapshot_id() - while self.snapshot_by_id(snapshot_id) is not None: - snapshot_id = _generate_snapshot_id() - - return snapshot_id - def current_snapshot(self) -> Optional[Snapshot]: """Get the current snapshot for this table, or None if there is no current snapshot.""" if self.metadata.current_snapshot_id is not None: @@ -916,18 +1047,32 @@ def history(self) -> List[SnapshotLogEntry]: return self.metadata.snapshot_log def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive: bool = True) -> UpdateSchema: - return UpdateSchema(self, allow_incompatible_changes=allow_incompatible_changes, case_sensitive=case_sensitive) + """Create a new UpdateSchema to alter the columns of this table. + + Args: + allow_incompatible_changes: If changes are allowed that might break downstream consumers. + case_sensitive: If field names are case-sensitive. - def name_mapping(self) -> NameMapping: + Returns: + A new UpdateSchema. + """ + return UpdateSchema( + transaction=Transaction(self, autocommit=True), + allow_incompatible_changes=allow_incompatible_changes, + case_sensitive=case_sensitive, + name_mapping=self.name_mapping(), + ) + + def name_mapping(self) -> Optional[NameMapping]: """Return the table's field-id NameMapping.""" - if name_mapping_json := self.properties.get(SCHEMA_NAME_MAPPING_DEFAULT): + if name_mapping_json := self.properties.get(TableProperties.DEFAULT_NAME_MAPPING): return parse_mapping_from_json(name_mapping_json) else: - return create_mapping_from_schema(self.schema()) + return None def append(self, df: pa.Table) -> None: """ - Append data to the table. + Shorthand API for appending a PyArrow table to the table. Args: df: The Arrow dataframe that will be appended to overwrite the table @@ -943,19 +1088,21 @@ def append(self, df: pa.Table) -> None: if len(self.spec().fields) > 0: raise ValueError("Cannot write to partitioned tables") - if len(self.sort_order().fields) > 0: - raise ValueError("Cannot write to tables with a sort-order") - - data_files = _dataframe_to_data_files(self, df=df) - merge = _MergingSnapshotProducer(operation=Operation.APPEND, table=self) - for data_file in data_files: - merge.append_data_file(data_file) + _check_schema(self.schema(), other_schema=df.schema) - merge.commit() + with self.transaction() as txn: + with txn.update_snapshot().fast_append() as update_snapshot: + # skip writing data files if the dataframe is empty + if df.shape[0] > 0: + data_files = _dataframe_to_data_files( + table_metadata=self.metadata, write_uuid=update_snapshot.commit_uuid, df=df, io=self.io + ) + for data_file in data_files: + update_snapshot.append_data_file(data_file) def overwrite(self, df: pa.Table, overwrite_filter: BooleanExpression = ALWAYS_TRUE) -> None: """ - Overwrite all the data in the table. + Shorthand for overwriting the table with a PyArrow table. Args: df: The Arrow dataframe that will be used to overwrite the table @@ -976,19 +1123,20 @@ def overwrite(self, df: pa.Table, overwrite_filter: BooleanExpression = ALWAYS_T if len(self.spec().fields) > 0: raise ValueError("Cannot write to partitioned tables") - if len(self.sort_order().fields) > 0: - raise ValueError("Cannot write to tables with a sort-order") + _check_schema(self.schema(), other_schema=df.schema) - data_files = _dataframe_to_data_files(self, df=df) - merge = _MergingSnapshotProducer( - operation=Operation.OVERWRITE if self.current_snapshot() is not None else Operation.APPEND, - table=self, - ) - - for data_file in data_files: - merge.append_data_file(data_file) + with self.transaction() as txn: + with txn.update_snapshot().overwrite() as update_snapshot: + # skip writing data files if the dataframe is empty + if df.shape[0] > 0: + data_files = _dataframe_to_data_files( + table_metadata=self.metadata, write_uuid=update_snapshot.commit_uuid, df=df, io=self.io + ) + for data_file in data_files: + update_snapshot.append_data_file(data_file) - merge.commit() + def update_spec(self, case_sensitive: bool = True) -> UpdateSpec: + return UpdateSpec(Transaction(self, autocommit=True), case_sensitive=case_sensitive) def refs(self) -> Dict[str, SnapshotRef]: """Return the snapshot references in the table.""" @@ -1025,6 +1173,16 @@ def __repr__(self) -> str: result_str = f"{table_name}(\n {schema_str}\n),\n{partition_str},\n{sort_order_str},\n{snapshot_str}" return result_str + def to_daft(self) -> daft.DataFrame: + """Read a Daft DataFrame lazily from this Iceberg table. + + Returns: + daft.DataFrame: Unmaterialized Daft Dataframe created from the Iceberg table + """ + import daft + + return daft.read_iceberg(self) + class StaticTable(Table): """Load a table directly from a metadata file (i.e., without using a catalog).""" @@ -1397,8 +1555,31 @@ class Move: other_field_id: Optional[int] = None -class UpdateSchema: - _table: Optional[Table] +U = TypeVar('U') + + +class UpdateTableMetadata(ABC, Generic[U]): + _transaction: Transaction + + def __init__(self, transaction: Transaction) -> None: + self._transaction = transaction + + @abstractmethod + def _commit(self) -> UpdatesAndRequirements: ... + + def commit(self) -> None: + self._transaction._apply(*self._commit()) + + def __exit__(self, _: Any, value: Any, traceback: Any) -> None: + """Close and commit the change.""" + self.commit() + + def __enter__(self) -> U: + """Update the table.""" + return self # type: ignore + + +class UpdateSchema(UpdateTableMetadata["UpdateSchema"]): _schema: Schema _last_column_id: itertools.count[int] _identifier_field_names: Set[str] @@ -1413,27 +1594,25 @@ class UpdateSchema: _id_to_parent: Dict[int, str] = {} _allow_incompatible_changes: bool _case_sensitive: bool - _transaction: Optional[Transaction] def __init__( self, - table: Optional[Table], - transaction: Optional[Transaction] = None, + transaction: Transaction, allow_incompatible_changes: bool = False, case_sensitive: bool = True, schema: Optional[Schema] = None, + name_mapping: Optional[NameMapping] = None, ) -> None: - self._table = table + super().__init__(transaction) if isinstance(schema, Schema): self._schema = schema self._last_column_id = itertools.count(1 + schema.highest_field_id) - elif table is not None: - self._schema = table.schema() - self._last_column_id = itertools.count(1 + table.metadata.last_column_id) else: - raise ValueError("Either provide a table or a schema") + self._schema = self._transaction.table_metadata.schema() + self._last_column_id = itertools.count(1 + self._transaction.table_metadata.last_column_id) + self._name_mapping = name_mapping self._identifier_field_names = self._schema.identifier_field_names() self._adds = {} @@ -1457,14 +1636,6 @@ def get_column_name(field_id: int) -> str: self._case_sensitive = case_sensitive self._transaction = transaction - def __exit__(self, _: Any, value: Any, traceback: Any) -> None: - """Close and commit the change.""" - return self.commit() - - def __enter__(self) -> UpdateSchema: - """Update the table.""" - return self - def case_sensitive(self, case_sensitive: bool) -> UpdateSchema: """Determine if the case of schema needs to be considered when comparing column names. @@ -1477,9 +1648,11 @@ def case_sensitive(self, case_sensitive: bool) -> UpdateSchema: self._case_sensitive = case_sensitive return self - def union_by_name(self, new_schema: Schema) -> UpdateSchema: + def union_by_name(self, new_schema: Union[Schema, "pa.Schema"]) -> UpdateSchema: + from pyiceberg.catalog import Catalog + visit_with_partner( - new_schema, + Catalog._convert_schema_if_needed(new_schema), -1, UnionByNameVisitor(update_schema=self, existing_schema=self._schema, case_sensitive=self._case_sensitive), # type: ignore PartnerIdByNameAccessor(partner_schema=self._schema, case_sensitive=self._case_sensitive), @@ -1621,7 +1794,7 @@ def rename_column(self, path_from: Union[str, Tuple[str, ...]], new_name: str) - from_field_correct_casing = self._schema.find_column_name(field_from.field_id) if from_field_correct_casing in self._identifier_field_names: self._identifier_field_names.remove(from_field_correct_casing) - new_identifier_path = f"{from_field_correct_casing[:-len(field_from.name)]}{new_name}" + new_identifier_path = f"{from_field_correct_casing[: -len(field_from.name)]}{new_name}" self._identifier_field_names.add(new_identifier_path) return self @@ -1851,32 +2024,36 @@ def move_after(self, path: Union[str, Tuple[str, ...]], after_name: Union[str, T return self - def commit(self) -> None: + def _commit(self) -> UpdatesAndRequirements: """Apply the pending changes and commit.""" - if self._table is None: - raise ValueError("Requires a table to commit to") - new_schema = self._apply() - existing_schema_id = next((schema.schema_id for schema in self._table.metadata.schemas if schema == new_schema), None) + existing_schema_id = next( + (schema.schema_id for schema in self._transaction.table_metadata.schemas if schema == new_schema), None + ) + + requirements: Tuple[TableRequirement, ...] = () + updates: Tuple[TableUpdate, ...] = () # Check if it is different current schema ID - if existing_schema_id != self._table.schema().schema_id: - requirements = (AssertCurrentSchemaId(current_schema_id=self._schema.schema_id),) + if existing_schema_id != self._schema.schema_id: + requirements += (AssertCurrentSchemaId(current_schema_id=self._schema.schema_id),) if existing_schema_id is None: - last_column_id = max(self._table.metadata.last_column_id, new_schema.highest_field_id) - updates = ( + last_column_id = max(self._transaction.table_metadata.last_column_id, new_schema.highest_field_id) + updates += ( AddSchemaUpdate(schema=new_schema, last_column_id=last_column_id), SetCurrentSchemaUpdate(schema_id=-1), ) else: - updates = (SetCurrentSchemaUpdate(schema_id=existing_schema_id),) # type: ignore + updates += (SetCurrentSchemaUpdate(schema_id=existing_schema_id),) - if self._transaction is not None: - self._transaction._append_updates(*updates) # pylint: disable=W0212 - self._transaction._append_requirements(*requirements) # pylint: disable=W0212 - else: - self._table._do_commit(updates=updates, requirements=requirements) # pylint: disable=W0212 + if name_mapping := self._name_mapping: + updated_name_mapping = update_mapping(name_mapping, self._updates, self._adds) + updates += ( + SetPropertiesUpdate(updates={TableProperties.DEFAULT_NAME_MAPPING: updated_name_mapping.model_dump_json()}), + ) + + return updates, requirements def _apply(self) -> Schema: """Apply the pending changes to the original schema and returns the result. @@ -1902,7 +2079,13 @@ def _apply(self) -> Schema: field_ids.add(field.field_id) - next_schema_id = 1 + (max(self._table.schemas().keys()) if self._table is not None else self._schema.schema_id) + if txn := self._transaction: + next_schema_id = 1 + ( + max(schema.schema_id for schema in txn.table_metadata.schemas) if txn.table_metadata is not None else 0 + ) + else: + next_schema_id = 0 + return Schema(*struct.fields, schema_id=next_schema_id, identifier_field_ids=field_ids) def assign_new_column_id(self) -> int: @@ -2232,20 +2415,6 @@ def _add_and_move_fields( return None if len(adds) == 0 else tuple(*fields, *adds) -def _generate_snapshot_id() -> int: - """Generate a new Snapshot ID from a UUID. - - Returns: An 64 bit long - """ - rnd_uuid = uuid.uuid4() - snapshot_id = int.from_bytes( - bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8], rnd_uuid.bytes[8:16])), byteorder='little', signed=True - ) - snapshot_id = snapshot_id if snapshot_id >= 0 else snapshot_id * -1 - - return snapshot_id - - @dataclass(frozen=True) class WriteTask: write_uuid: uuid.UUID @@ -2271,89 +2440,73 @@ def _generate_manifest_list_path(location: str, snapshot_id: int, attempt: int, return f'{location}/metadata/snap-{snapshot_id}-{attempt}-{commit_uuid}.avro' -def _dataframe_to_data_files(table: Table, df: pa.Table) -> Iterable[DataFile]: +def _dataframe_to_data_files( + table_metadata: TableMetadata, df: pa.Table, io: FileIO, write_uuid: Optional[uuid.UUID] = None +) -> Iterable[DataFile]: + """Convert a PyArrow table into a DataFile. + + Returns: + An iterable that supplies datafiles that represent the table. + """ from pyiceberg.io.pyarrow import write_file - if len(table.spec().fields) > 0: + if len([spec for spec in table_metadata.partition_specs if spec.spec_id != 0]) > 0: raise ValueError("Cannot write to partitioned tables") - if len(table.sort_order().fields) > 0: - raise ValueError("Cannot write to tables with a sort-order") - - write_uuid = uuid.uuid4() counter = itertools.count(0) + write_uuid = write_uuid or uuid.uuid4() # This is an iter, so we don't have to materialize everything every time # This will be more relevant when we start doing partitioned writes - yield from write_file(table, iter([WriteTask(write_uuid, next(counter), df)])) + yield from write_file(io=io, table_metadata=table_metadata, tasks=iter([WriteTask(write_uuid, next(counter), df)])) -class _MergingSnapshotProducer: +class _MergingSnapshotProducer(UpdateTableMetadata["_MergingSnapshotProducer"]): + commit_uuid: uuid.UUID _operation: Operation - _table: Table _snapshot_id: int _parent_snapshot_id: Optional[int] _added_data_files: List[DataFile] - _commit_uuid: uuid.UUID - def __init__(self, operation: Operation, table: Table) -> None: + def __init__( + self, + operation: Operation, + transaction: Transaction, + io: FileIO, + commit_uuid: Optional[uuid.UUID] = None, + ) -> None: + super().__init__(transaction) + self.commit_uuid = commit_uuid or uuid.uuid4() + self._io = io self._operation = operation - self._table = table - self._snapshot_id = table.new_snapshot_id() + self._snapshot_id = self._transaction.table_metadata.new_snapshot_id() # Since we only support the main branch for now - self._parent_snapshot_id = snapshot.snapshot_id if (snapshot := self._table.current_snapshot()) else None + self._parent_snapshot_id = ( + snapshot.snapshot_id if (snapshot := self._transaction.table_metadata.current_snapshot()) else None + ) self._added_data_files = [] - self._commit_uuid = uuid.uuid4() def append_data_file(self, data_file: DataFile) -> _MergingSnapshotProducer: self._added_data_files.append(data_file) return self - def _deleted_entries(self) -> List[ManifestEntry]: - """To determine if we need to record any deleted entries. - - With partial overwrites we have to use the predicate to evaluate - which entries are affected. - """ - if self._operation == Operation.OVERWRITE: - if self._parent_snapshot_id is not None: - previous_snapshot = self._table.snapshot_by_id(self._parent_snapshot_id) - if previous_snapshot is None: - # This should never happen since you cannot overwrite an empty table - raise ValueError(f"Could not find the previous snapshot: {self._parent_snapshot_id}") - - executor = ExecutorFactory.get_or_create() - - def _get_entries(manifest: ManifestFile) -> List[ManifestEntry]: - return [ - ManifestEntry( - status=ManifestEntryStatus.DELETED, - snapshot_id=entry.snapshot_id, - data_sequence_number=entry.data_sequence_number, - file_sequence_number=entry.file_sequence_number, - data_file=entry.data_file, - ) - for entry in manifest.fetch_manifest_entry(self._table.io, discard_deleted=True) - if entry.data_file.content == DataFileContent.DATA - ] + @abstractmethod + def _deleted_entries(self) -> List[ManifestEntry]: ... - list_of_entries = executor.map(_get_entries, previous_snapshot.manifests(self._table.io)) - return list(chain(*list_of_entries)) - return [] - elif self._operation == Operation.APPEND: - return [] - else: - raise ValueError(f"Not implemented for: {self._operation}") + @abstractmethod + def _existing_manifests(self) -> List[ManifestFile]: ... def _manifests(self) -> List[ManifestFile]: def _write_added_manifest() -> List[ManifestFile]: if self._added_data_files: - output_file_location = _new_manifest_path(location=self._table.location(), num=0, commit_uuid=self._commit_uuid) + output_file_location = _new_manifest_path( + location=self._transaction.table_metadata.location, num=0, commit_uuid=self.commit_uuid + ) with write_manifest( - format_version=self._table.format_version, - spec=self._table.spec(), - schema=self._table.schema(), - output_file=self._table.io.new_output(output_file_location), + format_version=self._transaction.table_metadata.format_version, + spec=self._transaction.table_metadata.spec(), + schema=self._transaction.table_metadata.schema(), + output_file=self._io.new_output(output_file_location), snapshot_id=self._snapshot_id, ) as writer: for data_file in self._added_data_files: @@ -2373,13 +2526,16 @@ def _write_added_manifest() -> List[ManifestFile]: def _write_delete_manifest() -> List[ManifestFile]: # Check if we need to mark the files as deleted deleted_entries = self._deleted_entries() - if deleted_entries: - output_file_location = _new_manifest_path(location=self._table.location(), num=1, commit_uuid=self._commit_uuid) + if len(deleted_entries) > 0: + output_file_location = _new_manifest_path( + location=self._transaction.table_metadata.location, num=1, commit_uuid=self.commit_uuid + ) + with write_manifest( - format_version=self._table.format_version, - spec=self._table.spec(), - schema=self._table.schema(), - output_file=self._table.io.new_output(output_file_location), + format_version=self._transaction.table_metadata.format_version, + spec=self._transaction.table_metadata.spec(), + schema=self._transaction.table_metadata.schema(), + output_file=self._io.new_output(output_file_location), snapshot_id=self._snapshot_id, ) as writer: for delete_entry in deleted_entries: @@ -2388,32 +2544,11 @@ def _write_delete_manifest() -> List[ManifestFile]: else: return [] - def _fetch_existing_manifests() -> List[ManifestFile]: - existing_manifests = [] - - # Add existing manifests - if self._operation == Operation.APPEND and self._parent_snapshot_id is not None: - # In case we want to append, just add the existing manifests - previous_snapshot = self._table.snapshot_by_id(self._parent_snapshot_id) - - if previous_snapshot is None: - raise ValueError(f"Snapshot could not be found: {self._parent_snapshot_id}") - - for manifest in previous_snapshot.manifests(io=self._table.io): - if ( - manifest.has_added_files() - or manifest.has_existing_files() - or manifest.added_snapshot_id == self._snapshot_id - ): - existing_manifests.append(manifest) - - return existing_manifests - executor = ExecutorFactory.get_or_create() added_manifests = executor.submit(_write_added_manifest) delete_manifests = executor.submit(_write_delete_manifest) - existing_manifests = executor.submit(_fetch_existing_manifests) + existing_manifests = executor.submit(self._existing_manifests) return added_manifests.result() + delete_manifests.result() + existing_manifests.result() @@ -2423,7 +2558,11 @@ def _summary(self) -> Summary: for data_file in self._added_data_files: ssc.add_file(data_file=data_file) - previous_snapshot = self._table.snapshot_by_id(self._parent_snapshot_id) if self._parent_snapshot_id is not None else None + previous_snapshot = ( + self._transaction.table_metadata.snapshot_by_id(self._parent_snapshot_id) + if self._parent_snapshot_id is not None + else None + ) return update_snapshot_summaries( summary=Summary(operation=self._operation, **ssc.build()), @@ -2431,18 +2570,21 @@ def _summary(self) -> Summary: truncate_full_table=self._operation == Operation.OVERWRITE, ) - def commit(self) -> Snapshot: + def _commit(self) -> UpdatesAndRequirements: new_manifests = self._manifests() - next_sequence_number = self._table.next_sequence_number() + next_sequence_number = self._transaction.table_metadata.next_sequence_number() summary = self._summary() manifest_list_file_path = _generate_manifest_list_path( - location=self._table.location(), snapshot_id=self._snapshot_id, attempt=0, commit_uuid=self._commit_uuid + location=self._transaction.table_metadata.location, + snapshot_id=self._snapshot_id, + attempt=0, + commit_uuid=self.commit_uuid, ) with write_manifest_list( - format_version=self._table.metadata.format_version, - output_file=self._table.io.new_output(manifest_list_file_path), + format_version=self._transaction.table_metadata.format_version, + output_file=self._io.new_output(manifest_list_file_path), snapshot_id=self._snapshot_id, parent_snapshot_id=self._parent_snapshot_id, sequence_number=next_sequence_number, @@ -2455,13 +2597,354 @@ def commit(self) -> Snapshot: manifest_list=manifest_list_file_path, sequence_number=next_sequence_number, summary=summary, - schema_id=self._table.schema().schema_id, + schema_id=self._transaction.table_metadata.current_schema_id, ) - with self._table.transaction() as tx: - tx.add_snapshot(snapshot=snapshot) - tx.set_ref_snapshot( - snapshot_id=self._snapshot_id, parent_snapshot_id=self._parent_snapshot_id, ref_name="main", type="branch" - ) + return ( + ( + AddSnapshotUpdate(snapshot=snapshot), + SetSnapshotRefUpdate( + snapshot_id=self._snapshot_id, parent_snapshot_id=self._parent_snapshot_id, ref_name="main", type="branch" + ), + ), + ( + AssertTableUUID(uuid=self._transaction.table_metadata.table_uuid), + AssertRefSnapshotId(snapshot_id=self._parent_snapshot_id, ref="main"), + ), + ) - return snapshot + +class FastAppendFiles(_MergingSnapshotProducer): + def _existing_manifests(self) -> List[ManifestFile]: + """To determine if there are any existing manifest files. + + A fast append will add another ManifestFile to the ManifestList. + All the existing manifest files are considered existing. + """ + existing_manifests = [] + + if self._parent_snapshot_id is not None: + previous_snapshot = self._transaction.table_metadata.snapshot_by_id(self._parent_snapshot_id) + + if previous_snapshot is None: + raise ValueError(f"Snapshot could not be found: {self._parent_snapshot_id}") + + for manifest in previous_snapshot.manifests(io=self._io): + if manifest.has_added_files() or manifest.has_existing_files() or manifest.added_snapshot_id == self._snapshot_id: + existing_manifests.append(manifest) + + return existing_manifests + + def _deleted_entries(self) -> List[ManifestEntry]: + """To determine if we need to record any deleted manifest entries. + + In case of an append, nothing is deleted. + """ + return [] + + +class OverwriteFiles(_MergingSnapshotProducer): + def _existing_manifests(self) -> List[ManifestFile]: + """To determine if there are any existing manifest files. + + In the of a full overwrite, all the previous manifests are + considered deleted. + """ + return [] + + def _deleted_entries(self) -> List[ManifestEntry]: + """To determine if we need to record any deleted entries. + + With a full overwrite all the entries are considered deleted. + With partial overwrites we have to use the predicate to evaluate + which entries are affected. + """ + if self._parent_snapshot_id is not None: + previous_snapshot = self._transaction.table_metadata.snapshot_by_id(self._parent_snapshot_id) + if previous_snapshot is None: + # This should never happen since you cannot overwrite an empty table + raise ValueError(f"Could not find the previous snapshot: {self._parent_snapshot_id}") + + executor = ExecutorFactory.get_or_create() + + def _get_entries(manifest: ManifestFile) -> List[ManifestEntry]: + return [ + ManifestEntry( + status=ManifestEntryStatus.DELETED, + snapshot_id=entry.snapshot_id, + data_sequence_number=entry.data_sequence_number, + file_sequence_number=entry.file_sequence_number, + data_file=entry.data_file, + ) + for entry in manifest.fetch_manifest_entry(self._io, discard_deleted=True) + if entry.data_file.content == DataFileContent.DATA + ] + + list_of_entries = executor.map(_get_entries, previous_snapshot.manifests(self._io)) + return list(chain(*list_of_entries)) + else: + return [] + + +class UpdateSnapshot: + _transaction: Transaction + _io: FileIO + + def __init__(self, transaction: Transaction, io: FileIO) -> None: + self._transaction = transaction + self._io = io + + def fast_append(self) -> FastAppendFiles: + return FastAppendFiles(operation=Operation.APPEND, transaction=self._transaction, io=self._io) + + def overwrite(self) -> OverwriteFiles: + return OverwriteFiles( + operation=Operation.OVERWRITE + if self._transaction.table_metadata.current_snapshot() is not None + else Operation.APPEND, + transaction=self._transaction, + io=self._io, + ) + + +class UpdateSpec(UpdateTableMetadata["UpdateSpec"]): + _transaction: Transaction + _name_to_field: Dict[str, PartitionField] = {} + _name_to_added_field: Dict[str, PartitionField] = {} + _transform_to_field: Dict[Tuple[int, str], PartitionField] = {} + _transform_to_added_field: Dict[Tuple[int, str], PartitionField] = {} + _renames: Dict[str, str] = {} + _added_time_fields: Dict[int, PartitionField] = {} + _case_sensitive: bool + _adds: List[PartitionField] + _deletes: Set[int] + _last_assigned_partition_id: int + + def __init__(self, transaction: Transaction, case_sensitive: bool = True) -> None: + super().__init__(transaction) + self._name_to_field = {field.name: field for field in transaction.table_metadata.spec().fields} + self._name_to_added_field = {} + self._transform_to_field = { + (field.source_id, repr(field.transform)): field for field in transaction.table_metadata.spec().fields + } + self._transform_to_added_field = {} + self._adds = [] + self._deletes = set() + self._last_assigned_partition_id = transaction.table_metadata.last_partition_id or PARTITION_FIELD_ID_START - 1 + self._renames = {} + self._transaction = transaction + self._case_sensitive = case_sensitive + self._added_time_fields = {} + + def add_field( + self, + source_column_name: str, + transform: Transform[Any, Any], + partition_field_name: Optional[str] = None, + ) -> UpdateSpec: + ref = Reference(source_column_name) + bound_ref = ref.bind(self._transaction.table_metadata.schema(), self._case_sensitive) + # verify transform can actually bind it + output_type = bound_ref.field.field_type + if not transform.can_transform(output_type): + raise ValueError(f"{transform} cannot transform {output_type} values from {bound_ref.field.name}") + + transform_key = (bound_ref.field.field_id, repr(transform)) + existing_partition_field = self._transform_to_field.get(transform_key) + if existing_partition_field and self._is_duplicate_partition(transform, existing_partition_field): + raise ValueError(f"Duplicate partition field for ${ref.name}=${ref}, ${existing_partition_field} already exists") + + added = self._transform_to_added_field.get(transform_key) + if added: + raise ValueError(f"Already added partition: {added.name}") + + new_field = self._partition_field((bound_ref.field.field_id, transform), partition_field_name) + if new_field.name in self._name_to_added_field: + raise ValueError(f"Already added partition field with name: {new_field.name}") + + if isinstance(new_field.transform, TimeTransform): + existing_time_field = self._added_time_fields.get(new_field.source_id) + if existing_time_field: + raise ValueError(f"Cannot add time partition field: {new_field.name} conflicts with {existing_time_field.name}") + self._added_time_fields[new_field.source_id] = new_field + self._transform_to_added_field[transform_key] = new_field + + existing_partition_field = self._name_to_field.get(new_field.name) + if existing_partition_field and new_field.field_id not in self._deletes: + if isinstance(existing_partition_field.transform, VoidTransform): + self.rename_field( + existing_partition_field.name, existing_partition_field.name + "_" + str(existing_partition_field.field_id) + ) + else: + raise ValueError(f"Cannot add duplicate partition field name: {existing_partition_field.name}") + + self._name_to_added_field[new_field.name] = new_field + self._adds.append(new_field) + return self + + def add_identity(self, source_column_name: str) -> UpdateSpec: + return self.add_field(source_column_name, IdentityTransform(), None) + + def remove_field(self, name: str) -> UpdateSpec: + added = self._name_to_added_field.get(name) + if added: + raise ValueError(f"Cannot delete newly added field {name}") + renamed = self._renames.get(name) + if renamed: + raise ValueError(f"Cannot rename and delete field {name}") + field = self._name_to_field.get(name) + if not field: + raise ValueError(f"No such partition field: {name}") + + self._deletes.add(field.field_id) + return self + + def rename_field(self, name: str, new_name: str) -> UpdateSpec: + existing_field = self._name_to_field.get(new_name) + if existing_field and isinstance(existing_field.transform, VoidTransform): + return self.rename_field(name, name + "_" + str(existing_field.field_id)) + added = self._name_to_added_field.get(name) + if added: + raise ValueError("Cannot rename recently added partitions") + field = self._name_to_field.get(name) + if not field: + raise ValueError(f"Cannot find partition field {name}") + if field.field_id in self._deletes: + raise ValueError(f"Cannot delete and rename partition field {name}") + self._renames[name] = new_name + return self + + def _commit(self) -> UpdatesAndRequirements: + new_spec = self._apply() + updates: Tuple[TableUpdate, ...] = () + requirements: Tuple[TableRequirement, ...] = () + + if self._transaction.table_metadata.default_spec_id != new_spec.spec_id: + if new_spec.spec_id not in self._transaction.table_metadata.specs(): + updates = ( + AddPartitionSpecUpdate(spec=new_spec), + SetDefaultSpecUpdate(spec_id=-1), + ) + else: + updates = (SetDefaultSpecUpdate(spec_id=new_spec.spec_id),) + + required_last_assigned_partitioned_id = self._transaction.table_metadata.last_partition_id + requirements = (AssertLastAssignedPartitionId(last_assigned_partition_id=required_last_assigned_partitioned_id),) + + return updates, requirements + + def _apply(self) -> PartitionSpec: + def _check_and_add_partition_name(schema: Schema, name: str, source_id: int, partition_names: Set[str]) -> None: + try: + field = schema.find_field(name) + except ValueError: + field = None + + if source_id is not None and field is not None and field.field_id != source_id: + raise ValueError(f"Cannot create identity partition from a different field in the schema {name}") + elif field is not None and source_id != field.field_id: + raise ValueError(f"Cannot create partition from name that exists in schema {name}") + if not name: + raise ValueError("Undefined name") + if name in partition_names: + raise ValueError(f"Partition name has to be unique: {name}") + partition_names.add(name) + + def _add_new_field( + schema: Schema, source_id: int, field_id: int, name: str, transform: Transform[Any, Any], partition_names: Set[str] + ) -> PartitionField: + _check_and_add_partition_name(schema, name, source_id, partition_names) + return PartitionField(source_id, field_id, transform, name) + + partition_fields = [] + partition_names: Set[str] = set() + for field in self._transaction.table_metadata.spec().fields: + if field.field_id not in self._deletes: + renamed = self._renames.get(field.name) + if renamed: + new_field = _add_new_field( + self._transaction.table_metadata.schema(), + field.source_id, + field.field_id, + renamed, + field.transform, + partition_names, + ) + else: + new_field = _add_new_field( + self._transaction.table_metadata.schema(), + field.source_id, + field.field_id, + field.name, + field.transform, + partition_names, + ) + partition_fields.append(new_field) + elif self._transaction.table_metadata.format_version == 1: + renamed = self._renames.get(field.name) + if renamed: + new_field = _add_new_field( + self._transaction.table_metadata.schema(), + field.source_id, + field.field_id, + renamed, + VoidTransform(), + partition_names, + ) + else: + new_field = _add_new_field( + self._transaction.table_metadata.schema(), + field.source_id, + field.field_id, + field.name, + VoidTransform(), + partition_names, + ) + + partition_fields.append(new_field) + + for added_field in self._adds: + new_field = PartitionField( + source_id=added_field.source_id, + field_id=added_field.field_id, + transform=added_field.transform, + name=added_field.name, + ) + partition_fields.append(new_field) + + # Reuse spec id or create a new one. + new_spec = PartitionSpec(*partition_fields) + new_spec_id = INITIAL_PARTITION_SPEC_ID + for spec in self._transaction.table_metadata.specs().values(): + if new_spec.compatible_with(spec): + new_spec_id = spec.spec_id + break + elif new_spec_id <= spec.spec_id: + new_spec_id = spec.spec_id + 1 + return PartitionSpec(*partition_fields, spec_id=new_spec_id) + + def _partition_field(self, transform_key: Tuple[int, Transform[Any, Any]], name: Optional[str]) -> PartitionField: + if self._transaction.table_metadata.format_version == 2: + source_id, transform = transform_key + historical_fields = [] + for spec in self._transaction.table_metadata.specs().values(): + for field in spec.fields: + historical_fields.append((field.source_id, field.field_id, repr(field.transform), field.name)) + + for field_key in historical_fields: + if field_key[0] == source_id and field_key[2] == repr(transform): + if name is None or field_key[3] == name: + return PartitionField(source_id, field_key[1], transform, name) + + new_field_id = self._new_field_id() + if name is None: + tmp_field = PartitionField(transform_key[0], new_field_id, transform_key[1], 'unassigned_field_name') + name = _visit_partition_field(self._transaction.table_metadata.schema(), tmp_field, _PartitionNameGenerator()) + return PartitionField(transform_key[0], new_field_id, transform_key[1], name) + + def _new_field_id(self) -> int: + self._last_assigned_partition_id += 1 + return self._last_assigned_partition_id + + def _is_duplicate_partition(self, transform: Transform[Any, Any], partition_field: PartitionField) -> bool: + return partition_field.field_id not in self._deletes and partition_field.transform == transform diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index 43e29c7b03..931b0cfe0a 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -28,7 +28,7 @@ Union, ) -from pydantic import Field, model_validator +from pydantic import Field, field_validator, model_validator from pydantic import ValidationError as PydanticValidationError from typing_extensions import Annotated @@ -49,6 +49,7 @@ IcebergRootModel, Properties, ) +from pyiceberg.types import transform_dict_value_to_str from pyiceberg.utils.datetime import datetime_to_millis CURRENT_SNAPSHOT_ID = "current-snapshot-id" @@ -218,6 +219,9 @@ class TableMetadataCommonFields(IcebergBaseModel): There is always a main branch reference pointing to the current-snapshot-id even if the refs map is null.""" + # validators + transform_properties_dict_value_to_str = field_validator('properties', mode='before')(transform_dict_value_to_str) + def snapshot_by_id(self, snapshot_id: int) -> Optional[Snapshot]: """Get the snapshot by snapshot_id.""" return next((snapshot for snapshot in self.snapshots if snapshot.snapshot_id == snapshot_id), None) @@ -226,6 +230,53 @@ def schema_by_id(self, schema_id: int) -> Optional[Schema]: """Get the schema by schema_id.""" return next((schema for schema in self.schemas if schema.schema_id == schema_id), None) + def schema(self) -> Schema: + """Return the schema for this table.""" + return next(schema for schema in self.schemas if schema.schema_id == self.current_schema_id) + + def spec(self) -> PartitionSpec: + """Return the partition spec of this table.""" + return next(spec for spec in self.partition_specs if spec.spec_id == self.default_spec_id) + + def specs(self) -> Dict[int, PartitionSpec]: + """Return a dict the partition specs this table.""" + return {spec.spec_id: spec for spec in self.partition_specs} + + def new_snapshot_id(self) -> int: + """Generate a new snapshot-id that's not in use.""" + snapshot_id = _generate_snapshot_id() + while self.snapshot_by_id(snapshot_id) is not None: + snapshot_id = _generate_snapshot_id() + + return snapshot_id + + def current_snapshot(self) -> Optional[Snapshot]: + """Get the current snapshot for this table, or None if there is no current snapshot.""" + if self.current_snapshot_id is not None: + return self.snapshot_by_id(self.current_snapshot_id) + return None + + def next_sequence_number(self) -> int: + return self.last_sequence_number + 1 if self.format_version > 1 else INITIAL_SEQUENCE_NUMBER + + def sort_order_by_id(self, sort_order_id: int) -> Optional[SortOrder]: + """Get the sort order by sort_order_id.""" + return next((sort_order for sort_order in self.sort_orders if sort_order.order_id == sort_order_id), None) + + +def _generate_snapshot_id() -> int: + """Generate a new Snapshot ID from a UUID. + + Returns: An 64 bit long + """ + rnd_uuid = uuid.uuid4() + snapshot_id = int.from_bytes( + bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8], rnd_uuid.bytes[8:16])), byteorder='little', signed=True + ) + snapshot_id = snapshot_id if snapshot_id >= 0 else snapshot_id * -1 + + return snapshot_id + class TableMetadataV1(TableMetadataCommonFields, IcebergBaseModel): """Represents version 1 of the Table Metadata. @@ -260,8 +311,10 @@ def set_v2_compatible_defaults(cls, data: Dict[str, Any]) -> Dict[str, Any]: The TableMetadata with the defaults applied. """ # When the schema doesn't have an ID - if data.get("schema") and "schema_id" not in data["schema"]: - data["schema"]["schema_id"] = DEFAULT_SCHEMA_ID + schema = data.get("schema") + if isinstance(schema, dict): + if "schema_id" not in schema and "schema-id" not in schema: + schema["schema_id"] = DEFAULT_SCHEMA_ID return data @@ -308,7 +361,8 @@ def construct_partition_specs(cls, data: Dict[str, Any]) -> Dict[str, Any]: data[PARTITION_SPECS] = [{"field-id": 0, "fields": ()}] data[LAST_PARTITION_ID] = max( - [field.get(FIELD_ID) for spec in data[PARTITION_SPECS] for field in spec[FIELDS]], default=PARTITION_FIELD_ID_START + [field.get(FIELD_ID) for spec in data[PARTITION_SPECS] for field in spec[FIELDS]], + default=PARTITION_FIELD_ID_START - 1, ) return data @@ -335,7 +389,7 @@ def to_v2(self) -> TableMetadataV2: metadata["format-version"] = 2 return TableMetadataV2.model_validate(metadata) - format_version: Literal[1] = Field(alias="format-version") + format_version: Literal[1] = Field(alias="format-version", default=1) """An integer version number for the format. Currently, this can be 1 or 2 based on the spec. Implementations must throw an exception if a table’s version is higher than the supported version.""" @@ -404,6 +458,8 @@ def new_table_metadata( properties: Properties = EMPTY_DICT, table_uuid: Optional[uuid.UUID] = None, ) -> TableMetadata: + from pyiceberg.table import TableProperties + fresh_schema = assign_fresh_schema_ids(schema) fresh_partition_spec = assign_fresh_partition_spec_ids(partition_spec, schema, fresh_schema) fresh_sort_order = assign_fresh_sort_order_ids(sort_order, schema, fresh_schema) @@ -411,6 +467,24 @@ def new_table_metadata( if table_uuid is None: table_uuid = uuid.uuid4() + # Remove format-version so it does not get persisted + format_version = int(properties.pop(TableProperties.FORMAT_VERSION, TableProperties.DEFAULT_FORMAT_VERSION)) + if format_version == 1: + return TableMetadataV1( + location=location, + last_column_id=fresh_schema.highest_field_id, + current_schema_id=fresh_schema.schema_id, + schema=fresh_schema, + partition_spec=[field.model_dump() for field in fresh_partition_spec.fields], + partition_specs=[fresh_partition_spec], + default_spec_id=fresh_partition_spec.spec_id, + sort_orders=[fresh_sort_order], + default_sort_order_id=fresh_sort_order.order_id, + properties=properties, + last_partition_id=fresh_partition_spec.last_assigned_field_id, + table_uuid=table_uuid, + ) + return TableMetadataV2( location=location, schemas=[fresh_schema], diff --git a/pyiceberg/table/name_mapping.py b/pyiceberg/table/name_mapping.py index 84a295f5e4..baa15f168d 100644 --- a/pyiceberg/table/name_mapping.py +++ b/pyiceberg/table/name_mapping.py @@ -26,7 +26,7 @@ from abc import ABC, abstractmethod from collections import ChainMap from functools import cached_property, singledispatch -from typing import Any, Dict, Generic, List, TypeVar, Union +from typing import Any, Dict, Generic, Iterator, List, Optional, TypeVar, Union from pydantic import Field, conlist, field_validator, model_serializer @@ -34,8 +34,6 @@ from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel from pyiceberg.types import ListType, MapType, NestedField, PrimitiveType, StructType -SCHEMA_NAME_MAPPING_DEFAULT = "schema.name-mapping.default" - class MappedField(IcebergBaseModel): field_id: int = Field(alias="field-id") @@ -47,6 +45,18 @@ class MappedField(IcebergBaseModel): def convert_null_to_empty_List(cls, v: Any) -> Any: return v or [] + @field_validator('names', mode='after') + @classmethod + def check_at_least_one(cls, v: List[str]) -> Any: + """ + Conlist constraint does not seem to be validating the class on instantiation. + + Adding a custom validator to enforce min_length=1 constraint. + """ + if len(v) < 1: + raise ValueError("At least one mapped name must be provided for the field") + return v + @model_serializer def ser_model(self) -> Dict[str, Any]: """Set custom serializer to leave out the field when it is empty.""" @@ -87,6 +97,10 @@ def __len__(self) -> int: """Return the number of mappings.""" return len(self.root) + def __iter__(self) -> Iterator[MappedField]: + """Iterate over the mapped fields.""" + return iter(self.root) + def __str__(self) -> str: """Convert the name-mapping into a nicely formatted string.""" if len(self.root) == 0: @@ -95,24 +109,25 @@ def __str__(self) -> str: return "[\n " + "\n ".join([str(e) for e in self.root]) + "\n]" +S = TypeVar('S') T = TypeVar("T") -class NameMappingVisitor(Generic[T], ABC): +class NameMappingVisitor(Generic[S, T], ABC): @abstractmethod - def mapping(self, nm: NameMapping, field_results: T) -> T: + def mapping(self, nm: NameMapping, field_results: S) -> S: """Visit a NameMapping.""" @abstractmethod - def fields(self, struct: List[MappedField], field_results: List[T]) -> T: + def fields(self, struct: List[MappedField], field_results: List[T]) -> S: """Visit a List[MappedField].""" @abstractmethod - def field(self, field: MappedField, field_result: T) -> T: + def field(self, field: MappedField, field_result: S) -> T: """Visit a MappedField.""" -class _IndexByName(NameMappingVisitor[Dict[str, MappedField]]): +class _IndexByName(NameMappingVisitor[Dict[str, MappedField], Dict[str, MappedField]]): def mapping(self, nm: NameMapping, field_results: Dict[str, MappedField]) -> Dict[str, MappedField]: return field_results @@ -131,18 +146,18 @@ def field(self, field: MappedField, field_result: Dict[str, MappedField]) -> Dic @singledispatch -def visit_name_mapping(obj: Union[NameMapping, List[MappedField], MappedField], visitor: NameMappingVisitor[T]) -> T: +def visit_name_mapping(obj: Union[NameMapping, List[MappedField], MappedField], visitor: NameMappingVisitor[S, T]) -> S: """Traverse the name mapping in post-order traversal.""" raise NotImplementedError(f"Cannot visit non-type: {obj}") @visit_name_mapping.register(NameMapping) -def _(obj: NameMapping, visitor: NameMappingVisitor[T]) -> T: +def _(obj: NameMapping, visitor: NameMappingVisitor[S, T]) -> S: return visitor.mapping(obj, visit_name_mapping(obj.root, visitor)) @visit_name_mapping.register(list) -def _(fields: List[MappedField], visitor: NameMappingVisitor[T]) -> T: +def _(fields: List[MappedField], visitor: NameMappingVisitor[S, T]) -> S: results = [visitor.field(field, visit_name_mapping(field.fields, visitor)) for field in fields] return visitor.fields(fields, results) @@ -177,5 +192,71 @@ def primitive(self, primitive: PrimitiveType) -> List[MappedField]: return [] +class _UpdateMapping(NameMappingVisitor[List[MappedField], MappedField]): + _updates: Dict[int, NestedField] + _adds: Dict[int, List[NestedField]] + + def __init__(self, updates: Dict[int, NestedField], adds: Dict[int, List[NestedField]]): + self._updates = updates + self._adds = adds + + @staticmethod + def _remove_reassigned_names(field: MappedField, assignments: Dict[str, int]) -> Optional[MappedField]: + removed_names = set() + for name in field.names: + if (assigned_id := assignments.get(name)) and assigned_id != field.field_id: + removed_names.add(name) + + remaining_names = [f for f in field.names if f not in removed_names] + if remaining_names: + return MappedField(field_id=field.field_id, names=remaining_names, fields=field.fields) + else: + return None + + def _add_new_fields(self, mapped_fields: List[MappedField], parent_id: int) -> List[MappedField]: + if fields_to_add := self._adds.get(parent_id): + fields: List[MappedField] = [] + new_fields: List[MappedField] = [] + + for add in fields_to_add: + new_fields.append( + MappedField(field_id=add.field_id, names=[add.name], fields=visit(add.field_type, _CreateMapping())) + ) + + reassignments = {f.name: f.field_id for f in fields_to_add} + fields = [ + updated_field + for field in mapped_fields + if (updated_field := self._remove_reassigned_names(field, reassignments)) is not None + ] + new_fields + return fields + else: + return mapped_fields + + def mapping(self, nm: NameMapping, field_results: List[MappedField]) -> List[MappedField]: + return self._add_new_fields(field_results, -1) + + def fields(self, struct: List[MappedField], field_results: List[MappedField]) -> List[MappedField]: + reassignments: Dict[str, int] = { + update.name: update.field_id for f in field_results if (update := self._updates.get(f.field_id)) + } + return [ + updated_field + for field in field_results + if (updated_field := self._remove_reassigned_names(field, reassignments)) is not None + ] + + def field(self, field: MappedField, field_result: List[MappedField]) -> MappedField: + field_names = field.names + if (update := self._updates.get(field.field_id)) is not None and update.name not in field_names: + field_names.append(update.name) + + return MappedField(field_id=field.field_id, names=field_names, fields=self._add_new_fields(field_result, field.field_id)) + + def create_mapping_from_schema(schema: Schema) -> NameMapping: return NameMapping(visit(schema, _CreateMapping())) + + +def update_mapping(mapping: NameMapping, updates: Dict[int, NestedField], adds: Dict[int, List[NestedField]]) -> NameMapping: + return NameMapping(visit_name_mapping(mapping, _UpdateMapping(updates, adds))) diff --git a/pyiceberg/table/sorting.py b/pyiceberg/table/sorting.py index f970d687fa..f74d5bc701 100644 --- a/pyiceberg/table/sorting.py +++ b/pyiceberg/table/sorting.py @@ -168,7 +168,9 @@ def __repr__(self) -> str: UNSORTED_SORT_ORDER = SortOrder(order_id=UNSORTED_SORT_ORDER_ID) -def assign_fresh_sort_order_ids(sort_order: SortOrder, old_schema: Schema, fresh_schema: Schema) -> SortOrder: +def assign_fresh_sort_order_ids( + sort_order: SortOrder, old_schema: Schema, fresh_schema: Schema, sort_order_id: int = INITIAL_SORT_ORDER_ID +) -> SortOrder: if sort_order.is_unsorted: return UNSORTED_SORT_ORDER @@ -189,4 +191,4 @@ def assign_fresh_sort_order_ids(sort_order: SortOrder, old_schema: Schema, fresh ) ) - return SortOrder(*fresh_fields, order_id=INITIAL_SORT_ORDER_ID) + return SortOrder(*fresh_fields, order_id=sort_order_id) diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 9f499a3dd4..e678a77e69 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -655,6 +655,11 @@ def _(value: int, _type: IcebergType) -> str: return _int_to_human_string(_type, value) +@_human_string.register(bool) +def _(value: bool, _type: IcebergType) -> str: + return str(value).lower() + + @singledispatch def _int_to_human_string(_type: IcebergType, value: int) -> str: return str(value) diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index 56a3d3c72d..e57bf3490c 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -73,7 +73,7 @@ def __missing__(self, key: K) -> V: Identifier = Tuple[str, ...] -Properties = Dict[str, str] +Properties = Dict[str, Any] RecursiveDict = Dict[str, Union[str, "RecursiveDict"]] # Represents the literal value diff --git a/pyiceberg/types.py b/pyiceberg/types.py index eb215121dc..746f03ea0b 100644 --- a/pyiceberg/types.py +++ b/pyiceberg/types.py @@ -37,6 +37,7 @@ from typing import ( Any, ClassVar, + Dict, Literal, Optional, Tuple, @@ -61,6 +62,14 @@ FIXED_PARSER = ParseNumberFromBrackets(FIXED) +def transform_dict_value_to_str(dict: Dict[str, Any]) -> Dict[str, str]: + """Transform all values in the dictionary to string. Raise an error if any value is None.""" + for key, value in dict.items(): + if value is None: + raise ValueError(f"None type is not a supported value in properties: {key}") + return {k: str(v) for k, v in dict.items()} + + def _parse_decimal_type(decimal: Any) -> Tuple[int, int]: if isinstance(decimal, str): matches = DECIMAL_REGEX.search(decimal) diff --git a/pyiceberg/utils/config.py b/pyiceberg/utils/config.py index 31ba0b36ed..e038005469 100644 --- a/pyiceberg/utils/config.py +++ b/pyiceberg/utils/config.py @@ -125,8 +125,8 @@ def set_property(_config: RecursiveDict, path: List[str], config_value: str) -> env_var_lower = env_var.lower() if env_var_lower.startswith(PYICEBERG.lower()): key = env_var_lower[len(PYICEBERG) :] - parts = key.split("__") - parts_normalized = [part.replace("_", "-") for part in parts] + parts = key.split("__", maxsplit=2) + parts_normalized = [part.replace('__', '.').replace("_", "-") for part in parts] set_property(config, parts_normalized, config_value) return config diff --git a/pyproject.toml b/pyproject.toml index d1bc82dc62..52c60d9482 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,29 +57,31 @@ sortedcontainers = "2.4.0" fsspec = ">=2023.1.0,<2024.1.0" pyparsing = ">=3.1.0,<4.0.0" zstandard = ">=0.13.0,<1.0.0" +tenacity = ">=8.2.3,<9.0.0" pyarrow = { version = ">=9.0.0,<16.0.0", optional = true } pandas = { version = ">=1.0.0,<3.0.0", optional = true } duckdb = { version = ">=0.5.0,<1.0.0", optional = true } -ray = { version = ">=2.0.0,<2.8.0", optional = true } +ray = { version = ">=2.0.0,<2.10.0", optional = true } python-snappy = { version = ">=0.6.0,<1.0.0", optional = true } thrift = { version = ">=0.13.0,<1.0.0", optional = true } mypy-boto3-glue = { version = ">=1.28.18", optional = true } boto3 = { version = ">=1.24.59", optional = true } s3fs = { version = ">=2023.1.0,<2024.1.0", optional = true } -adlfs = { version = ">=2023.1.0,<2024.1.0", optional = true } +adlfs = { version = ">=2023.1.0,<2024.3.0", optional = true } gcsfs = { version = ">=2023.1.0,<2024.1.0", optional = true } psycopg2-binary = { version = ">=2.9.6", optional = true } sqlalchemy = { version = "^2.0.18", optional = true } +getdaft = { version = ">=0.2.12", optional = true } [tool.poetry.dev-dependencies] pytest = "7.4.4" pytest-checkdocs = "2.10.1" pytest-lazy-fixture = "0.6.3" pre-commit = "3.5.0" -fastavro = "1.9.3" -coverage = { version = "^7.4.1", extras = ["toml"] } +fastavro = "1.9.4" +coverage = { version = "^7.4.2", extras = ["toml"] } requests-mock = "1.11.0" -moto = { version = "^4.2.13", extras = ["server"] } +moto = { version = "^5.0.2", extras = ["server"] } typing-extensions = "4.9.0" pytest-mock = "3.12.0" pyspark = "3.5.0" @@ -105,6 +107,7 @@ pyarrow = ["pyarrow"] pandas = ["pandas", "pyarrow"] duckdb = ["duckdb", "pyarrow"] ray = ["ray", "pyarrow", "pandas"] +daft = ["getdaft"] snappy = ["python-snappy"] hive = ["thrift"] s3fs = ["s3fs"] @@ -263,6 +266,10 @@ ignore_missing_imports = true module = "ray.*" ignore_missing_imports = true +[[tool.mypy.overrides]] +module = "daft.*" +ignore_missing_imports = true + [[tool.mypy.overrides]] module = "pyparsing.*" ignore_missing_imports = true @@ -295,13 +302,17 @@ ignore_missing_imports = true module = "setuptools.*" ignore_missing_imports = true +[[tool.mypy.overrides]] +module = "tenacity.*" +ignore_missing_imports = true + [tool.coverage.run] source = ['pyiceberg/'] [tool.ruff] src = ['pyiceberg','tests'] extend-exclude = ["dev/provision.py"] -select = [ +lint.select = [ "E", # pycodestyle "W", # pycodestyle "F", # Pyflakes @@ -311,11 +322,11 @@ select = [ "I", # isort "UP", # pyupgrade ] -ignore = ["E501","E203","B024","B028","UP037"] +lint.ignore = ["E501","E203","B024","B028","UP037"] # Allow autofix for all enabled rules (when `--fix`) is provided. -fixable = ["ALL"] -unfixable = [] +lint.fixable = ["ALL"] +lint.unfixable = [] # Exclude a variety of commonly ignored directories. exclude = [ @@ -341,19 +352,19 @@ exclude = [ "node_modules", "venv", ] -per-file-ignores = {} +lint.per-file-ignores = {} # Ignore _all_ violations. # Same as Black. line-length = 130 # Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -[tool.ruff.pyupgrade] +[tool.ruff.lint.pyupgrade] # Preserve types, even if a file imports `from __future__ import annotations`. keep-runtime-typing = true -[tool.ruff.isort] +[tool.ruff.lint.isort] detect-same-package = true lines-between-types = 0 known-first-party = ["pyiceberg", "tests"] diff --git a/tests/avro/test_decoder.py b/tests/avro/test_decoder.py index bbcc7394f4..608e6ae2d5 100644 --- a/tests/avro/test_decoder.py +++ b/tests/avro/test_decoder.py @@ -144,13 +144,13 @@ def test_read_single_byte_at_the_time(decoder_class: Callable[[bytes], BinaryDec @pytest.mark.parametrize("decoder_class", AVAILABLE_DECODERS) def test_read_float(decoder_class: Callable[[bytes], BinaryDecoder]) -> None: - decoder = decoder_class(b"\x00\x00\x9A\x41") + decoder = decoder_class(b"\x00\x00\x9a\x41") assert decoder.read_float() == 19.25 @pytest.mark.parametrize("decoder_class", AVAILABLE_DECODERS) def test_skip_float(decoder_class: Callable[[bytes], BinaryDecoder]) -> None: - decoder = decoder_class(b"\x00\x00\x9A\x41") + decoder = decoder_class(b"\x00\x00\x9a\x41") assert decoder.tell() == 0 decoder.skip_float() assert decoder.tell() == 4 @@ -179,13 +179,13 @@ def test_read_bytes(decoder_class: Callable[[bytes], BinaryDecoder]) -> None: @pytest.mark.parametrize("decoder_class", AVAILABLE_DECODERS) def test_read_utf8(decoder_class: Callable[[bytes], BinaryDecoder]) -> None: - decoder = decoder_class(b"\x04\x76\x6F") + decoder = decoder_class(b"\x04\x76\x6f") assert decoder.read_utf8() == "vo" @pytest.mark.parametrize("decoder_class", AVAILABLE_DECODERS) def test_skip_utf8(decoder_class: Callable[[bytes], BinaryDecoder]) -> None: - decoder = decoder_class(b"\x04\x76\x6F") + decoder = decoder_class(b"\x04\x76\x6f") assert decoder.tell() == 0 decoder.skip_utf8() assert decoder.tell() == 3 @@ -193,7 +193,7 @@ def test_skip_utf8(decoder_class: Callable[[bytes], BinaryDecoder]) -> None: @pytest.mark.parametrize("decoder_class", AVAILABLE_DECODERS) def test_read_int_as_float(decoder_class: Callable[[bytes], BinaryDecoder]) -> None: - decoder = decoder_class(b"\x00\x00\x9A\x41") + decoder = decoder_class(b"\x00\x00\x9a\x41") reader = resolve_reader(FloatType(), DoubleType()) assert reader.read(decoder) == 19.25 diff --git a/tests/catalog/integration_test_dynamodb.py b/tests/catalog/integration_test_dynamodb.py index 5ca8767d6d..591e489b83 100644 --- a/tests/catalog/integration_test_dynamodb.py +++ b/tests/catalog/integration_test_dynamodb.py @@ -96,6 +96,15 @@ def test_create_duplicated_table(test_catalog: Catalog, table_schema_nested: Sch test_catalog.create_table((database_name, table_name), table_schema_nested) +def test_create_table_if_not_exists_duplicated_table( + test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str +) -> None: + test_catalog.create_namespace(database_name) + table1 = test_catalog.create_table((database_name, table_name), table_schema_nested) + table2 = test_catalog.create_table_if_not_exists((database_name, table_name), table_schema_nested) + assert table1.identifier == table2.identifier + + def test_load_table(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None: identifier = (database_name, table_name) test_catalog.create_namespace(database_name) diff --git a/tests/catalog/integration_test_glue.py b/tests/catalog/integration_test_glue.py index a56e4c6aaa..a685b7da7b 100644 --- a/tests/catalog/integration_test_glue.py +++ b/tests/catalog/integration_test_glue.py @@ -200,6 +200,15 @@ def test_create_duplicated_table(test_catalog: Catalog, table_schema_nested: Sch test_catalog.create_table((database_name, table_name), table_schema_nested) +def test_create_table_if_not_exists_duplicated_table( + test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str +) -> None: + test_catalog.create_namespace(database_name) + table1 = test_catalog.create_table((database_name, table_name), table_schema_nested) + table2 = test_catalog.create_table_if_not_exists((database_name, table_name), table_schema_nested) + assert table1.identifier == table2.identifier + + def test_load_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None: identifier = (database_name, table_name) test_catalog.create_namespace(database_name) diff --git a/tests/catalog/test_base.py b/tests/catalog/test_base.py index b0c79b4fd3..c23d59e64d 100644 --- a/tests/catalog/test_base.py +++ b/tests/catalog/test_base.py @@ -22,6 +22,7 @@ import pyarrow as pa import pytest +from pydantic_core import ValidationError from pytest_lazyfixture import lazy_fixture from pyiceberg.catalog import ( @@ -46,11 +47,185 @@ SetCurrentSchemaUpdate, Table, TableIdentifier, + update_table_metadata, ) +from pyiceberg.table.metadata import TableMetadataV1 +from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder from pyiceberg.transforms import IdentityTransform from pyiceberg.types import IntegerType, LongType, NestedField +class InMemoryCatalog(Catalog): + """An in-memory catalog implementation for testing purposes.""" + + __tables: Dict[Identifier, Table] + __namespaces: Dict[Identifier, Properties] + + def __init__(self, name: str, **properties: str) -> None: + super().__init__(name, **properties) + self.__tables = {} + self.__namespaces = {} + + def create_table( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + location: Optional[str] = None, + partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC, + sort_order: SortOrder = UNSORTED_SORT_ORDER, + properties: Properties = EMPTY_DICT, + ) -> Table: + schema: Schema = self._convert_schema_if_needed(schema) # type: ignore + + identifier = Catalog.identifier_to_tuple(identifier) + namespace = Catalog.namespace_from(identifier) + + if identifier in self.__tables: + raise TableAlreadyExistsError(f"Table already exists: {identifier}") + else: + if namespace not in self.__namespaces: + self.__namespaces[namespace] = {} + + new_location = location or f's3://warehouse/{"/".join(identifier)}/data' + metadata = TableMetadataV1(**{ + "format-version": 1, + "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", + "location": new_location, + "last-updated-ms": 1602638573874, + "last-column-id": schema.highest_field_id, + "schema": schema.model_dump(), + "partition-spec": partition_spec.model_dump()["fields"], + "properties": properties, + "current-snapshot-id": -1, + "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}], + }) + table = Table( + identifier=identifier, + metadata=metadata, + metadata_location=f's3://warehouse/{"/".join(identifier)}/metadata/metadata.json', + io=load_file_io(), + catalog=self, + ) + self.__tables[identifier] = table + return table + + def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table: + raise NotImplementedError + + def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: + identifier = tuple(table_request.identifier.namespace.root) + (table_request.identifier.name,) + table = self.__tables[identifier] + table.metadata = update_table_metadata(base_metadata=table.metadata, updates=table_request.updates) + + return CommitTableResponse( + metadata=table.metadata.model_dump(), + metadata_location=table.location(), + ) + + def load_table(self, identifier: Union[str, Identifier]) -> Table: + identifier = self.identifier_to_tuple_without_catalog(identifier) + try: + return self.__tables[identifier] + except KeyError as error: + raise NoSuchTableError(f"Table does not exist: {identifier}") from error + + def drop_table(self, identifier: Union[str, Identifier]) -> None: + identifier = self.identifier_to_tuple_without_catalog(identifier) + try: + self.__tables.pop(identifier) + except KeyError as error: + raise NoSuchTableError(f"Table does not exist: {identifier}") from error + + def purge_table(self, identifier: Union[str, Identifier]) -> None: + self.drop_table(identifier) + + def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table: + from_identifier = self.identifier_to_tuple_without_catalog(from_identifier) + try: + table = self.__tables.pop(from_identifier) + except KeyError as error: + raise NoSuchTableError(f"Table does not exist: {from_identifier}") from error + + to_identifier = Catalog.identifier_to_tuple(to_identifier) + to_namespace = Catalog.namespace_from(to_identifier) + if to_namespace not in self.__namespaces: + self.__namespaces[to_namespace] = {} + + self.__tables[to_identifier] = Table( + identifier=to_identifier, + metadata=table.metadata, + metadata_location=table.metadata_location, + io=load_file_io(), + catalog=self, + ) + return self.__tables[to_identifier] + + def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None: + namespace = Catalog.identifier_to_tuple(namespace) + if namespace in self.__namespaces: + raise NamespaceAlreadyExistsError(f"Namespace already exists: {namespace}") + else: + self.__namespaces[namespace] = properties if properties else {} + + def drop_namespace(self, namespace: Union[str, Identifier]) -> None: + namespace = Catalog.identifier_to_tuple(namespace) + if [table_identifier for table_identifier in self.__tables.keys() if namespace == table_identifier[:-1]]: + raise NamespaceNotEmptyError(f"Namespace is not empty: {namespace}") + try: + self.__namespaces.pop(namespace) + except KeyError as error: + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") from error + + def list_tables(self, namespace: Optional[Union[str, Identifier]] = None) -> List[Identifier]: + if namespace: + namespace = Catalog.identifier_to_tuple(namespace) + list_tables = [table_identifier for table_identifier in self.__tables.keys() if namespace == table_identifier[:-1]] + else: + list_tables = list(self.__tables.keys()) + + return list_tables + + def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]: + # Hierarchical namespace is not supported. Return an empty list + if namespace: + return [] + + return list(self.__namespaces.keys()) + + def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties: + namespace = Catalog.identifier_to_tuple(namespace) + try: + return self.__namespaces[namespace] + except KeyError as error: + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") from error + + def update_namespace_properties( + self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = EMPTY_DICT + ) -> PropertiesUpdateSummary: + removed: Set[str] = set() + updated: Set[str] = set() + + namespace = Catalog.identifier_to_tuple(namespace) + if namespace in self.__namespaces: + if removals: + for key in removals: + if key in self.__namespaces[namespace]: + del self.__namespaces[namespace][key] + removed.add(key) + if updates: + for key, value in updates.items(): + self.__namespaces[namespace][key] = value + updated.add(key) + else: + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") + + expected_to_change = removed.difference(removals or set()) + + return PropertiesUpdateSummary( + removed=list(removed or []), updated=list(updates.keys() if updates else []), missing=list(expected_to_change) + ) + + @pytest.fixture def catalog(tmp_path: PosixPath) -> InMemoryCatalog: return InMemoryCatalog("test.in_memory.catalog", **{WAREHOUSE: tmp_path.absolute().as_posix(), "test.key": "test.value"}) @@ -74,12 +249,15 @@ def catalog(tmp_path: PosixPath) -> InMemoryCatalog: NAMESPACE_NOT_EMPTY_ERROR = "Namespace is not empty: \\('com', 'organization', 'department'\\)" -def given_catalog_has_a_table(catalog: InMemoryCatalog) -> Table: +def given_catalog_has_a_table( + catalog: InMemoryCatalog, + properties: Properties = EMPTY_DICT, +) -> Table: return catalog.create_table( identifier=TEST_TABLE_IDENTIFIER, schema=TEST_TABLE_SCHEMA, partition_spec=TEST_TABLE_PARTITION_SPEC, - properties=TEST_TABLE_PROPERTIES, + properties=properties or TEST_TABLE_PROPERTIES, ) @@ -413,12 +591,9 @@ def test_commit_table(catalog: InMemoryCatalog) -> None: # Then assert response.metadata.table_uuid == given_table.metadata.table_uuid - assert given_table.schema().schema_id == 1 - assert given_table.metadata.current_schema_id == 1 assert len(response.metadata.schemas) == 2 assert response.metadata.schemas[1] == new_schema - assert response.metadata.schemas[1].schema_id == 1 - assert given_table.metadata.last_column_id == new_schema.highest_field_id + assert response.metadata.current_schema_id == new_schema.schema_id def test_add_column(catalog: InMemoryCatalog) -> None: @@ -481,4 +656,18 @@ def test_add_column_with_statement(catalog: InMemoryCatalog) -> None: def test_catalog_repr(catalog: InMemoryCatalog) -> None: s = repr(catalog) - assert s == "test.in_memory.catalog ()" + assert s == "test.in.memory.catalog ()" + + +def test_table_properties_int_value(catalog: InMemoryCatalog) -> None: + # table properties can be set to int, but still serialized to string + property_with_int = {"property_name": 42} + given_table = given_catalog_has_a_table(catalog, properties=property_with_int) + assert isinstance(given_table.properties["property_name"], str) + + +def test_table_properties_raise_for_none_value(catalog: InMemoryCatalog) -> None: + property_with_none = {"property_name": None} + with pytest.raises(ValidationError) as exc_info: + _ = given_catalog_has_a_table(catalog, properties=property_with_none) + assert "None type is not a supported value in properties: property_name" in str(exc_info.value) diff --git a/tests/catalog/test_dynamodb.py b/tests/catalog/test_dynamodb.py index bc801463c5..218b0e8be7 100644 --- a/tests/catalog/test_dynamodb.py +++ b/tests/catalog/test_dynamodb.py @@ -20,7 +20,7 @@ import boto3 import pyarrow as pa import pytest -from moto import mock_dynamodb +from moto import mock_aws from pyiceberg.catalog import METADATA_LOCATION, TABLE_TYPE from pyiceberg.catalog.dynamodb import ( @@ -45,7 +45,7 @@ from tests.conftest import BUCKET_NAME, TABLE_METADATA_LOCATION_REGEX -@mock_dynamodb +@mock_aws def test_create_dynamodb_catalog_with_table_name(_dynamodb, _bucket_initialize: None) -> None: # type: ignore DynamoDbCatalog("test_ddb_catalog") response = _dynamodb.describe_table(TableName=DYNAMODB_TABLE_NAME_DEFAULT) @@ -59,7 +59,7 @@ def test_create_dynamodb_catalog_with_table_name(_dynamodb, _bucket_initialize: assert response["Table"]["TableStatus"] == ACTIVE -@mock_dynamodb +@mock_aws def test_create_table_with_database_location( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -72,7 +72,7 @@ def test_create_table_with_database_location( assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) -@mock_dynamodb +@mock_aws def test_create_table_with_pyarrow_schema( _bucket_initialize: None, moto_endpoint_url: str, @@ -89,7 +89,7 @@ def test_create_table_with_pyarrow_schema( assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) -@mock_dynamodb +@mock_aws def test_create_table_with_default_warehouse( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -102,7 +102,7 @@ def test_create_table_with_default_warehouse( assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) -@mock_dynamodb +@mock_aws def test_create_table_with_given_location( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -117,7 +117,7 @@ def test_create_table_with_given_location( assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) -@mock_dynamodb +@mock_aws def test_create_table_with_no_location( _bucket_initialize: None, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -128,7 +128,7 @@ def test_create_table_with_no_location( test_catalog.create_table(identifier=identifier, schema=table_schema_nested) -@mock_dynamodb +@mock_aws def test_create_table_with_strips( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -141,7 +141,7 @@ def test_create_table_with_strips( assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) -@mock_dynamodb +@mock_aws def test_create_table_with_strips_bucket_root( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -154,7 +154,7 @@ def test_create_table_with_strips_bucket_root( assert TABLE_METADATA_LOCATION_REGEX.match(table_strip.metadata_location) -@mock_dynamodb +@mock_aws def test_create_table_with_no_database( _bucket_initialize: None, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -164,7 +164,7 @@ def test_create_table_with_no_database( test_catalog.create_table(identifier=identifier, schema=table_schema_nested) -@mock_dynamodb +@mock_aws def test_create_duplicated_table( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -176,7 +176,19 @@ def test_create_duplicated_table( test_catalog.create_table(identifier, table_schema_nested) -@mock_dynamodb +@mock_aws +def test_create_table_if_not_exists_duplicated_table( + _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str +) -> None: + identifier = (database_name, table_name) + test_catalog = DynamoDbCatalog("test_ddb_catalog", **{"warehouse": f"s3://{BUCKET_NAME}", "s3.endpoint": moto_endpoint_url}) + test_catalog.create_namespace(namespace=database_name) + table1 = test_catalog.create_table(identifier, table_schema_nested) + table2 = test_catalog.create_table_if_not_exists(identifier, table_schema_nested) + assert table1.identifier == table2.identifier + + +@mock_aws def test_load_table( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -190,7 +202,7 @@ def test_load_table( assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) -@mock_dynamodb +@mock_aws def test_load_table_from_self_identifier( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -205,7 +217,7 @@ def test_load_table_from_self_identifier( assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) -@mock_dynamodb +@mock_aws def test_load_non_exist_table(_bucket_initialize: None, database_name: str, table_name: str) -> None: identifier = (database_name, table_name) test_catalog = DynamoDbCatalog("test_ddb_catalog", warehouse=f"s3://{BUCKET_NAME}") @@ -214,7 +226,7 @@ def test_load_non_exist_table(_bucket_initialize: None, database_name: str, tabl test_catalog.load_table(identifier) -@mock_dynamodb +@mock_aws def test_drop_table( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -231,7 +243,7 @@ def test_drop_table( test_catalog.load_table(identifier) -@mock_dynamodb +@mock_aws def test_drop_table_from_self_identifier( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -250,7 +262,7 @@ def test_drop_table_from_self_identifier( test_catalog.load_table(table.identifier) -@mock_dynamodb +@mock_aws def test_drop_non_exist_table(_bucket_initialize: None, database_name: str, table_name: str) -> None: identifier = (database_name, table_name) test_catalog = DynamoDbCatalog("test_ddb_catalog", warehouse=f"s3://{BUCKET_NAME}") @@ -258,7 +270,7 @@ def test_drop_non_exist_table(_bucket_initialize: None, database_name: str, tabl test_catalog.drop_table(identifier) -@mock_dynamodb +@mock_aws def test_rename_table( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -281,7 +293,7 @@ def test_rename_table( test_catalog.load_table(identifier) -@mock_dynamodb +@mock_aws def test_rename_table_from_self_identifier( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -306,7 +318,7 @@ def test_rename_table_from_self_identifier( test_catalog.load_table(table.identifier) -@mock_dynamodb +@mock_aws def test_fail_on_rename_table_with_missing_required_params(_bucket_initialize: None, database_name: str, table_name: str) -> None: new_database_name = f"{database_name}_new" new_table_name = f"{table_name}_new" @@ -330,7 +342,7 @@ def test_fail_on_rename_table_with_missing_required_params(_bucket_initialize: N test_catalog.rename_table(identifier, new_identifier) -@mock_dynamodb +@mock_aws def test_fail_on_rename_non_iceberg_table( _dynamodb: boto3.client, _bucket_initialize: None, database_name: str, table_name: str ) -> None: @@ -359,7 +371,7 @@ def test_fail_on_rename_non_iceberg_table( test_catalog.rename_table(identifier, new_identifier) -@mock_dynamodb +@mock_aws def test_list_tables( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_list: List[str] ) -> None: @@ -372,7 +384,7 @@ def test_list_tables( assert (database_name, table_name) in loaded_table_list -@mock_dynamodb +@mock_aws def test_list_namespaces(_bucket_initialize: None, database_list: List[str]) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") for database_name in database_list: @@ -382,7 +394,7 @@ def test_list_namespaces(_bucket_initialize: None, database_list: List[str]) -> assert (database_name,) in loaded_database_list -@mock_dynamodb +@mock_aws def test_create_namespace_no_properties(_bucket_initialize: None, database_name: str) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") test_catalog.create_namespace(namespace=database_name) @@ -393,7 +405,7 @@ def test_create_namespace_no_properties(_bucket_initialize: None, database_name: assert properties == {} -@mock_dynamodb +@mock_aws def test_create_namespace_with_comment_and_location(_bucket_initialize: None, database_name: str) -> None: test_location = f"s3://{BUCKET_NAME}/{database_name}.db" test_properties = { @@ -410,7 +422,7 @@ def test_create_namespace_with_comment_and_location(_bucket_initialize: None, da assert properties["location"] == test_location -@mock_dynamodb +@mock_aws def test_create_duplicated_namespace(_bucket_initialize: None, database_name: str) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") test_catalog.create_namespace(namespace=database_name) @@ -421,7 +433,7 @@ def test_create_duplicated_namespace(_bucket_initialize: None, database_name: st test_catalog.create_namespace(namespace=database_name, properties={"test": "test"}) -@mock_dynamodb +@mock_aws def test_drop_namespace(_bucket_initialize: None, database_name: str) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") test_catalog.create_namespace(namespace=database_name) @@ -433,7 +445,7 @@ def test_drop_namespace(_bucket_initialize: None, database_name: str) -> None: assert len(loaded_database_list) == 0 -@mock_dynamodb +@mock_aws def test_drop_non_empty_namespace( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -446,14 +458,14 @@ def test_drop_non_empty_namespace( test_catalog.drop_namespace(database_name) -@mock_dynamodb +@mock_aws def test_drop_non_exist_namespace(_bucket_initialize: None, database_name: str) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") with pytest.raises(NoSuchNamespaceError): test_catalog.drop_namespace(database_name) -@mock_dynamodb +@mock_aws def test_load_namespace_properties(_bucket_initialize: None, database_name: str) -> None: test_location = f"s3://{BUCKET_NAME}/{database_name}.db" test_properties = { @@ -471,14 +483,14 @@ def test_load_namespace_properties(_bucket_initialize: None, database_name: str) assert v == test_properties[k] -@mock_dynamodb +@mock_aws def test_load_non_exist_namespace_properties(_bucket_initialize: None, database_name: str) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") with pytest.raises(NoSuchNamespaceError): test_catalog.load_namespace_properties(database_name) -@mock_dynamodb +@mock_aws def test_update_namespace_properties(_bucket_initialize: None, database_name: str) -> None: test_properties = { "comment": "this is a test description", @@ -503,7 +515,7 @@ def test_update_namespace_properties(_bucket_initialize: None, database_name: st test_catalog.drop_namespace(database_name) -@mock_dynamodb +@mock_aws def test_load_empty_namespace_properties(_bucket_initialize: None, database_name: str) -> None: test_catalog = DynamoDbCatalog("test_ddb_catalog") test_catalog.create_namespace(database_name) @@ -511,7 +523,7 @@ def test_load_empty_namespace_properties(_bucket_initialize: None, database_name assert listed_properties == {} -@mock_dynamodb +@mock_aws def test_update_namespace_properties_overlap_update_removal(_bucket_initialize: None, database_name: str) -> None: test_properties = { "comment": "this is a test description", diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index 63a213f94f..6e0196c1a2 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -20,7 +20,7 @@ import boto3 import pyarrow as pa import pytest -from moto import mock_glue +from moto import mock_aws from pyiceberg.catalog.glue import GlueCatalog from pyiceberg.exceptions import ( @@ -37,7 +37,7 @@ from tests.conftest import BUCKET_NAME, TABLE_METADATA_LOCATION_REGEX -@mock_glue +@mock_aws def test_create_table_with_database_location( _glue: boto3.client, _bucket_initialize: None, @@ -72,7 +72,39 @@ def test_create_table_with_database_location( assert storage_descriptor["Location"] == f"s3://{BUCKET_NAME}/{database_name}.db/{table_name}" -@mock_glue +@mock_aws +def test_create_v1_table( + _bucket_initialize: None, + _glue: boto3.client, + moto_endpoint_url: str, + table_schema_nested: Schema, + database_name: str, + table_name: str, +) -> None: + catalog_name = "glue" + test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url}) + test_catalog.create_namespace(namespace=database_name, properties={"location": f"s3://{BUCKET_NAME}/{database_name}.db"}) + table = test_catalog.create_table((database_name, table_name), table_schema_nested, properties={"format-version": "1"}) + assert table.format_version == 1 + + table_info = _glue.get_table( + DatabaseName=database_name, + Name=table_name, + ) + + storage_descriptor = table_info["Table"]["StorageDescriptor"] + columns = storage_descriptor["Columns"] + assert len(columns) == len(table_schema_nested.fields) + assert columns[0] == { + "Name": "foo", + "Type": "string", + "Parameters": {"iceberg.field.id": "1", "iceberg.field.optional": "true", "iceberg.field.current": "true"}, + } + + assert storage_descriptor["Location"] == f"s3://{BUCKET_NAME}/{database_name}.db/{table_name}" + + +@mock_aws def test_create_table_with_default_warehouse( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -86,7 +118,7 @@ def test_create_table_with_default_warehouse( assert test_catalog._parse_metadata_version(table.metadata_location) == 0 -@mock_glue +@mock_aws def test_create_table_with_given_location( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -102,7 +134,7 @@ def test_create_table_with_given_location( assert test_catalog._parse_metadata_version(table.metadata_location) == 0 -@mock_glue +@mock_aws def test_create_table_with_pyarrow_schema( _bucket_initialize: None, moto_endpoint_url: str, @@ -124,7 +156,7 @@ def test_create_table_with_pyarrow_schema( assert test_catalog._parse_metadata_version(table.metadata_location) == 0 -@mock_glue +@mock_aws def test_create_table_with_no_location( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -136,7 +168,7 @@ def test_create_table_with_no_location( test_catalog.create_table(identifier=identifier, schema=table_schema_nested) -@mock_glue +@mock_aws def test_create_table_with_strips( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -150,7 +182,7 @@ def test_create_table_with_strips( assert test_catalog._parse_metadata_version(table.metadata_location) == 0 -@mock_glue +@mock_aws def test_create_table_with_strips_bucket_root( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -164,7 +196,7 @@ def test_create_table_with_strips_bucket_root( assert test_catalog._parse_metadata_version(table_strip.metadata_location) == 0 -@mock_glue +@mock_aws def test_create_table_with_no_database( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -174,7 +206,7 @@ def test_create_table_with_no_database( test_catalog.create_table(identifier=identifier, schema=table_schema_nested) -@mock_glue +@mock_aws def test_create_duplicated_table( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -186,7 +218,7 @@ def test_create_duplicated_table( test_catalog.create_table(identifier, table_schema_nested) -@mock_glue +@mock_aws def test_load_table( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -201,7 +233,7 @@ def test_load_table( assert test_catalog._parse_metadata_version(table.metadata_location) == 0 -@mock_glue +@mock_aws def test_load_table_from_self_identifier( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -215,7 +247,7 @@ def test_load_table_from_self_identifier( assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) -@mock_glue +@mock_aws def test_load_non_exist_table(_bucket_initialize: None, moto_endpoint_url: str, database_name: str, table_name: str) -> None: identifier = (database_name, table_name) test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}/"}) @@ -224,7 +256,7 @@ def test_load_non_exist_table(_bucket_initialize: None, moto_endpoint_url: str, test_catalog.load_table(identifier) -@mock_glue +@mock_aws def test_drop_table( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -241,7 +273,7 @@ def test_drop_table( test_catalog.load_table(identifier) -@mock_glue +@mock_aws def test_drop_table_from_self_identifier( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -260,7 +292,7 @@ def test_drop_table_from_self_identifier( test_catalog.load_table(table.identifier) -@mock_glue +@mock_aws def test_drop_non_exist_table(_bucket_initialize: None, moto_endpoint_url: str, database_name: str, table_name: str) -> None: identifier = (database_name, table_name) test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}/"}) @@ -268,7 +300,7 @@ def test_drop_non_exist_table(_bucket_initialize: None, moto_endpoint_url: str, test_catalog.drop_table(identifier) -@mock_glue +@mock_aws def test_rename_table( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -292,7 +324,7 @@ def test_rename_table( test_catalog.load_table(identifier) -@mock_glue +@mock_aws def test_rename_table_from_self_identifier( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -317,7 +349,7 @@ def test_rename_table_from_self_identifier( test_catalog.load_table(table.identifier) -@mock_glue +@mock_aws def test_rename_table_no_params( _glue: boto3.client, _bucket_initialize: None, moto_endpoint_url: str, database_name: str, table_name: str ) -> None: @@ -336,7 +368,7 @@ def test_rename_table_no_params( test_catalog.rename_table(identifier, new_identifier) -@mock_glue +@mock_aws def test_rename_non_iceberg_table( _glue: boto3.client, _bucket_initialize: None, moto_endpoint_url: str, database_name: str, table_name: str ) -> None: @@ -359,7 +391,7 @@ def test_rename_non_iceberg_table( test_catalog.rename_table(identifier, new_identifier) -@mock_glue +@mock_aws def test_list_tables( _bucket_initialize: None, moto_endpoint_url: str, @@ -377,7 +409,7 @@ def test_list_tables( assert (database_name, table_name) in loaded_table_list -@mock_glue +@mock_aws def test_list_namespaces(_bucket_initialize: None, moto_endpoint_url: str, database_list: List[str]) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) for database_name in database_list: @@ -387,7 +419,7 @@ def test_list_namespaces(_bucket_initialize: None, moto_endpoint_url: str, datab assert (database_name,) in loaded_database_list -@mock_glue +@mock_aws def test_create_namespace_no_properties(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) test_catalog.create_namespace(namespace=database_name) @@ -398,7 +430,7 @@ def test_create_namespace_no_properties(_bucket_initialize: None, moto_endpoint_ assert properties == {} -@mock_glue +@mock_aws def test_create_namespace_with_comment_and_location(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_location = f"s3://{BUCKET_NAME}/{database_name}.db" test_properties = { @@ -415,7 +447,7 @@ def test_create_namespace_with_comment_and_location(_bucket_initialize: None, mo assert properties["location"] == test_location -@mock_glue +@mock_aws def test_create_duplicated_namespace(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) test_catalog.create_namespace(namespace=database_name) @@ -426,7 +458,7 @@ def test_create_duplicated_namespace(_bucket_initialize: None, moto_endpoint_url test_catalog.create_namespace(namespace=database_name, properties={"test": "test"}) -@mock_glue +@mock_aws def test_drop_namespace(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) test_catalog.create_namespace(namespace=database_name) @@ -438,7 +470,7 @@ def test_drop_namespace(_bucket_initialize: None, moto_endpoint_url: str, databa assert len(loaded_database_list) == 0 -@mock_glue +@mock_aws def test_drop_non_empty_namespace( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: @@ -451,14 +483,14 @@ def test_drop_non_empty_namespace( test_catalog.drop_namespace(database_name) -@mock_glue +@mock_aws def test_drop_non_exist_namespace(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) with pytest.raises(NoSuchNamespaceError): test_catalog.drop_namespace(database_name) -@mock_glue +@mock_aws def test_load_namespace_properties(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_location = f"s3://{BUCKET_NAME}/{database_name}.db" test_properties = { @@ -476,14 +508,14 @@ def test_load_namespace_properties(_bucket_initialize: None, moto_endpoint_url: assert v == test_properties[k] -@mock_glue +@mock_aws def test_load_non_exist_namespace_properties(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) with pytest.raises(NoSuchNamespaceError): test_catalog.load_namespace_properties(database_name) -@mock_glue +@mock_aws def test_update_namespace_properties(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_properties = { "comment": "this is a test description", @@ -508,7 +540,7 @@ def test_update_namespace_properties(_bucket_initialize: None, moto_endpoint_url test_catalog.drop_namespace(database_name) -@mock_glue +@mock_aws def test_load_empty_namespace_properties(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url}) test_catalog.create_namespace(database_name) @@ -516,7 +548,7 @@ def test_load_empty_namespace_properties(_bucket_initialize: None, moto_endpoint assert listed_properties == {} -@mock_glue +@mock_aws def test_load_default_namespace_properties(_glue, _bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None: # type: ignore # simulate creating database with default settings through AWS Glue Web Console _glue.create_database(DatabaseInput={"Name": database_name}) @@ -525,7 +557,7 @@ def test_load_default_namespace_properties(_glue, _bucket_initialize: None, moto assert listed_properties == {} -@mock_glue +@mock_aws def test_update_namespace_properties_overlap_update_removal( _bucket_initialize: None, moto_endpoint_url: str, database_name: str ) -> None: @@ -546,7 +578,7 @@ def test_update_namespace_properties_overlap_update_removal( assert test_catalog.load_namespace_properties(database_name) == test_properties -@mock_glue +@mock_aws def test_passing_profile_name() -> None: session_properties: Dict[str, Any] = { "aws_access_key_id": "abc", @@ -566,7 +598,7 @@ def test_passing_profile_name() -> None: assert test_catalog.glue is mock_session().client() -@mock_glue +@mock_aws def test_commit_table_update_schema( _glue: boto3.client, _bucket_initialize: None, @@ -619,7 +651,7 @@ def test_commit_table_update_schema( assert storage_descriptor["Location"] == f"s3://{BUCKET_NAME}/{database_name}.db/{table_name}" -@mock_glue +@mock_aws def test_commit_table_properties( _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str ) -> None: diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index f42962f0f3..e59b7599bc 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -43,7 +43,7 @@ ) from pyiceberg.partitioning import PartitionField, PartitionSpec from pyiceberg.schema import Schema -from pyiceberg.table.metadata import TableMetadataUtil, TableMetadataV2 +from pyiceberg.table.metadata import TableMetadataUtil, TableMetadataV1, TableMetadataV2 from pyiceberg.table.refs import SnapshotRef, SnapshotRefType from pyiceberg.table.snapshots import ( MetadataLogEntry, @@ -277,7 +277,7 @@ def test_create_table(table_schema_simple: Schema, hive_database: HiveDatabase, ) ], current_schema_id=0, - last_partition_id=1000, + last_partition_id=999, properties={"owner": "javaberg", 'write.parquet.compression-codec': 'zstd'}, partition_specs=[PartitionSpec()], default_spec_id=0, @@ -295,6 +295,59 @@ def test_create_table(table_schema_simple: Schema, hive_database: HiveDatabase, assert metadata.model_dump() == expected.model_dump() +@patch("time.time", MagicMock(return_value=12345)) +def test_create_v1_table(table_schema_simple: Schema, hive_database: HiveDatabase, hive_table: HiveTable) -> None: + catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + + catalog._client = MagicMock() + catalog._client.__enter__().create_table.return_value = None + catalog._client.__enter__().get_table.return_value = hive_table + catalog._client.__enter__().get_database.return_value = hive_database + catalog.create_table( + ("default", "table"), schema=table_schema_simple, properties={"owner": "javaberg", "format-version": "1"} + ) + + # Test creating V1 table + called_v1_table: HiveTable = catalog._client.__enter__().create_table.call_args[0][0] + metadata_location = called_v1_table.parameters["metadata_location"] + with open(metadata_location, encoding=UTF8) as f: + payload = f.read() + + expected_schema = Schema( + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), + schema_id=0, + identifier_field_ids=[2], + ) + actual_v1_metadata = TableMetadataUtil.parse_raw(payload) + expected_spec = PartitionSpec() + expected_v1_metadata = TableMetadataV1( + location=actual_v1_metadata.location, + table_uuid=actual_v1_metadata.table_uuid, + last_updated_ms=actual_v1_metadata.last_updated_ms, + last_column_id=3, + schema=expected_schema, + schemas=[expected_schema], + current_schema_id=0, + last_partition_id=999, + properties={"owner": "javaberg", "write.parquet.compression-codec": "zstd"}, + partition_spec=[], + partition_specs=[expected_spec], + default_spec_id=0, + current_snapshot_id=None, + snapshots=[], + snapshot_log=[], + metadata_log=[], + sort_orders=[SortOrder(order_id=0)], + default_sort_order_id=0, + refs={}, + format_version=1, + ) + + assert actual_v1_metadata.model_dump() == expected_v1_metadata.model_dump() + + def test_load_table(hive_table: HiveTable) -> None: catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 248cc14d88..c4668a71ec 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=redefined-outer-name,unused-argument import os -from typing import Any, Dict, cast +from typing import Any, Callable, Dict, cast from unittest import mock import pytest @@ -26,6 +26,7 @@ from pyiceberg.catalog import PropertiesUpdateSummary, Table, load_catalog from pyiceberg.catalog.rest import AUTH_URL, RestCatalog from pyiceberg.exceptions import ( + AuthorizationExpiredError, NamespaceAlreadyExistsError, NoSuchNamespaceError, NoSuchTableError, @@ -107,6 +108,24 @@ def test_token_200(rest_mock: Mocker) -> None: "token_type": "Bearer", "expires_in": 86400, "issued_token_type": "urn:ietf:params:oauth:token-type:access_token", + "scope": "openid offline", + "refresh_token": "refresh_token", + }, + status_code=200, + request_headers=OAUTH_TEST_HEADERS, + ) + assert ( + RestCatalog("rest", uri=TEST_URI, credential=TEST_CREDENTIALS)._session.headers["Authorization"] # pylint: disable=W0212 + == f"Bearer {TEST_TOKEN}" + ) + + +def test_token_200_without_optional_fields(rest_mock: Mocker) -> None: + rest_mock.post( + f"{TEST_URI}v1/oauth/tokens", + json={ + "access_token": TEST_TOKEN, + "token_type": "Bearer", }, status_code=200, request_headers=OAUTH_TEST_HEADERS, @@ -166,6 +185,45 @@ def test_config_200(requests_mock: Mocker) -> None: assert history[1].url == "https://iceberg-test-catalog/v1/config?warehouse=s3%3A%2F%2Fsome-bucket" +def test_properties_sets_headers(requests_mock: Mocker) -> None: + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + + catalog = RestCatalog( + "rest", uri=TEST_URI, warehouse="s3://some-bucket", **{"header.Content-Type": "application/vnd.api+json"} + ) + + assert ( + catalog._session.headers.get("Content-type") == "application/vnd.api+json" + ), "Expected 'Content-Type' header to be 'application/vnd.api+json'" + + assert ( + requests_mock.last_request.headers["Content-type"] == "application/vnd.api+json" + ), "Config request did not include expected 'Content-Type' header" + + +def test_config_sets_headers(requests_mock: Mocker) -> None: + namespace = "leden" + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {"header.Content-Type": "application/vnd.api+json"}, "overrides": {}}, + status_code=200, + ) + requests_mock.post(f"{TEST_URI}v1/namespaces", json={"namespace": [namespace], "properties": {}}, status_code=200) + catalog = RestCatalog("rest", uri=TEST_URI, warehouse="s3://some-bucket") + catalog.create_namespace(namespace) + + assert ( + catalog._session.headers.get("Content-type") == "application/vnd.api+json" + ), "Expected 'Content-Type' header to be 'application/vnd.api+json'" + assert ( + requests_mock.last_request.headers["Content-type"] == "application/vnd.api+json" + ), "Create namespace request did not include expected 'Content-Type' header" + + def test_token_400(rest_mock: Mocker) -> None: rest_mock.post( f"{TEST_URI}v1/oauth/tokens", @@ -266,6 +324,68 @@ def test_list_namespace_with_parent_200(rest_mock: Mocker) -> None: ] +def test_list_namespaces_token_expired(rest_mock: Mocker) -> None: + new_token = "new_jwt_token" + new_header = dict(TEST_HEADERS) + new_header["Authorization"] = f"Bearer {new_token}" + + namespaces = rest_mock.register_uri( + "GET", + f"{TEST_URI}v1/namespaces", + [ + { + "status_code": 419, + "json": { + "error": { + "message": "Authorization expired.", + "type": "AuthorizationExpiredError", + "code": 419, + } + }, + "headers": TEST_HEADERS, + }, + { + "status_code": 200, + "json": {"namespaces": [["default"], ["examples"], ["fokko"], ["system"]]}, + "headers": new_header, + }, + { + "status_code": 200, + "json": {"namespaces": [["default"], ["examples"], ["fokko"], ["system"]]}, + "headers": new_header, + }, + ], + ) + tokens = rest_mock.post( + f"{TEST_URI}v1/oauth/tokens", + json={ + "access_token": new_token, + "token_type": "Bearer", + "expires_in": 86400, + "issued_token_type": "urn:ietf:params:oauth:token-type:access_token", + }, + status_code=200, + ) + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, credential=TEST_CREDENTIALS) + assert catalog.list_namespaces() == [ + ("default",), + ("examples",), + ("fokko",), + ("system",), + ] + assert namespaces.call_count == 2 + assert tokens.call_count == 1 + + assert catalog.list_namespaces() == [ + ("default",), + ("examples",), + ("fokko",), + ("system",), + ] + assert namespaces.call_count == 3 + assert tokens.call_count == 1 + + def test_create_namespace_200(rest_mock: Mocker) -> None: namespace = "leden" rest_mock.post( @@ -517,6 +637,93 @@ def test_create_table_409(rest_mock: Mocker, table_schema_simple: Schema) -> Non assert "Table already exists" in str(e.value) +def test_create_table_if_not_exists_200( + rest_mock: Mocker, table_schema_simple: Schema, example_table_metadata_no_snapshot_v1_rest_json: Dict[str, Any] +) -> None: + def json_callback() -> Callable[[Any, Any], Dict[str, Any]]: + call_count = 0 + + def callback(request: Any, context: Any) -> Dict[str, Any]: + nonlocal call_count + call_count += 1 + + if call_count == 1: + context.status_code = 200 + return example_table_metadata_no_snapshot_v1_rest_json + else: + context.status_code = 409 + return { + "error": { + "message": "Table already exists: fokko.already_exists in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "AlreadyExistsException", + "code": 409, + } + } + + return callback + + rest_mock.post( + f"{TEST_URI}v1/namespaces/fokko/tables", + json=json_callback(), + request_headers=TEST_HEADERS, + ) + rest_mock.get( + f"{TEST_URI}v1/namespaces/fokko/tables/fokko2", + json=example_table_metadata_no_snapshot_v1_rest_json, + status_code=200, + request_headers=TEST_HEADERS, + ) + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN) + table1 = catalog.create_table( + identifier=("fokko", "fokko2"), + schema=table_schema_simple, + location=None, + partition_spec=PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=3), name="id"), spec_id=1 + ), + sort_order=SortOrder(SortField(source_id=2, transform=IdentityTransform())), + properties={"owner": "fokko"}, + ) + table2 = catalog.create_table_if_not_exists( + identifier=("fokko", "fokko2"), + schema=table_schema_simple, + location=None, + partition_spec=PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=3), name="id")), + sort_order=SortOrder(SortField(source_id=2, transform=IdentityTransform())), + properties={"owner": "fokko"}, + ) + assert table1 == table2 + + +def test_create_table_419(rest_mock: Mocker, table_schema_simple: Schema) -> None: + rest_mock.post( + f"{TEST_URI}v1/namespaces/fokko/tables", + json={ + "error": { + "message": "Authorization expired.", + "type": "AuthorizationExpiredError", + "code": 419, + } + }, + status_code=419, + request_headers=TEST_HEADERS, + ) + + with pytest.raises(AuthorizationExpiredError) as e: + RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).create_table( + identifier=("fokko", "fokko2"), + schema=table_schema_simple, + location=None, + partition_spec=PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=3), name="id") + ), + sort_order=SortOrder(SortField(source_id=2, transform=IdentityTransform())), + properties={"owner": "fokko"}, + ) + assert "Authorization expired" in str(e.value) + assert rest_mock.call_count == 3 + + def test_register_table_200( rest_mock: Mocker, table_schema_simple: Schema, example_table_metadata_no_snapshot_v1_rest_json: Dict[str, Any] ) -> None: diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 1ca8fd16d2..0b869d6826 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -21,7 +21,7 @@ import pyarrow as pa import pytest -from pytest import TempPathFactory +from pydantic_core import ValidationError from pytest_lazyfixture import lazy_fixture from sqlalchemy.exc import ArgumentError, IntegrityError @@ -38,7 +38,9 @@ ) from pyiceberg.io import FSSPEC_FILE_IO, PY_IO_IMPL from pyiceberg.io.pyarrow import schema_to_pyarrow +from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC from pyiceberg.schema import Schema +from pyiceberg.table import _dataframe_to_data_files from pyiceberg.table.snapshots import Operation from pyiceberg.table.sorting import ( NullOrder, @@ -50,11 +52,6 @@ from pyiceberg.types import IntegerType -@pytest.fixture(name="warehouse", scope="session") -def fixture_warehouse(tmp_path_factory: TempPathFactory) -> Path: - return tmp_path_factory.mktemp("test_sql") - - @pytest.fixture(name="random_identifier") def fixture_random_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) @@ -158,6 +155,24 @@ def test_create_table_default_sort_order(catalog: SqlCatalog, table_schema_neste catalog.drop_table(random_identifier) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_v1_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: + database_name, _table_name = random_identifier + catalog.create_namespace(database_name) + table = catalog.create_table(random_identifier, table_schema_nested, properties={"format-version": "1"}) + assert table.sort_order().order_id == 0, "Order ID must match" + assert table.sort_order().is_unsorted is True, "Order must be unsorted" + assert table.format_version == 1 + assert table.spec() == UNPARTITIONED_PARTITION_SPEC + catalog.drop_table(random_identifier) + + @pytest.mark.parametrize( 'catalog', [ @@ -234,6 +249,23 @@ def test_create_duplicated_table(catalog: SqlCatalog, table_schema_nested: Schem catalog.create_table(random_identifier, table_schema_nested) +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_create_table_if_not_exists_duplicated_table( + catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier +) -> None: + database_name, _table_name = random_identifier + catalog.create_namespace(database_name) + table1 = catalog.create_table(random_identifier, table_schema_nested) + table2 = catalog.create_table_if_not_exists(random_identifier, table_schema_nested) + assert table1.identifier == table2.identifier + + @pytest.mark.parametrize( 'catalog', [ @@ -609,7 +641,7 @@ def test_create_namespace_with_null_properties(catalog: SqlCatalog, database_nam catalog.create_namespace(namespace=database_name, properties={None: "value"}) # type: ignore with pytest.raises(IntegrityError): - catalog.create_namespace(namespace=database_name, properties={"key": None}) # type: ignore + catalog.create_namespace(namespace=database_name, properties={"key": None}) @pytest.mark.parametrize( @@ -696,6 +728,18 @@ def test_load_empty_namespace_properties(catalog: SqlCatalog, database_name: str assert listed_properties == {"exists": "true"} +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +def test_load_namespace_properties_non_existing_namespace(catalog: SqlCatalog) -> None: + with pytest.raises(NoSuchNamespaceError): + catalog.load_namespace_properties("does_not_exist") + + @pytest.mark.parametrize( 'catalog', [ @@ -821,3 +865,90 @@ def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schem # This one should fail since it already has been updated with table_b.update_schema() as update: update.add_column(path="c", field_type=IntegerType()) + + +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + lazy_fixture('catalog_sqlite_without_rowcount'), + ], +) +@pytest.mark.parametrize("format_version", [1, 2]) +def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: + identifier = f"default.arrow_write_data_and_evolve_schema_v{format_version}" + + try: + catalog.create_namespace("default") + except NamespaceAlreadyExistsError: + pass + + try: + catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + pa_table = pa.Table.from_pydict( + { + 'foo': ['a', None, 'z'], + }, + schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), + ) + + tbl = catalog.create_table(identifier=identifier, schema=pa_table.schema, properties={"format-version": str(format_version)}) + + pa_table_with_column = pa.Table.from_pydict( + { + 'foo': ['a', None, 'z'], + 'bar': [19, None, 25], + }, + schema=pa.schema([ + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=True), + ]), + ) + + with tbl.transaction() as txn: + with txn.update_schema() as schema_txn: + schema_txn.union_by_name(pa_table_with_column.schema) + + with txn.update_snapshot().fast_append() as snapshot_update: + for data_file in _dataframe_to_data_files(table_metadata=txn.table_metadata, df=pa_table_with_column, io=tbl.io): + snapshot_update.append_data_file(data_file) + + +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + lazy_fixture('catalog_sqlite_without_rowcount'), + ], +) +def test_table_properties_int_value(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: + # table properties can be set to int, but still serialized to string + database_name, _table_name = random_identifier + catalog.create_namespace(database_name) + property_with_int = {"property_name": 42} + table = catalog.create_table(random_identifier, table_schema_simple, properties=property_with_int) + assert isinstance(table.properties["property_name"], str) + + +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + lazy_fixture('catalog_sqlite_without_rowcount'), + ], +) +def test_table_properties_raise_for_none_value( + catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier +) -> None: + database_name, _table_name = random_identifier + catalog.create_namespace(database_name) + property_with_none = {"property_name": None} + with pytest.raises(ValidationError) as exc_info: + _ = catalog.create_table(random_identifier, table_schema_simple, properties=property_with_none) + assert "None type is not a supported value in properties: property_name" in str(exc_info.value) diff --git a/tests/cli/test_console.py b/tests/cli/test_console.py index 1f9f831d21..e48ede046a 100644 --- a/tests/cli/test_console.py +++ b/tests/cli/test_console.py @@ -337,6 +337,22 @@ def test_drop_namespace_does_not_exists(catalog: InMemoryCatalog) -> None: assert result.output == "Namespace does not exist: ('doesnotexist',)\n" +def test_create_namespace(catalog: InMemoryCatalog) -> None: + runner = CliRunner() + result = runner.invoke(run, ["create", "namespace", TEST_TABLE_NAMESPACE]) + assert result.exit_code == 0 + assert result.output == """Created namespace: default\n""" + + +def test_create_namespace_already_exists(catalog: InMemoryCatalog) -> None: + catalog.create_namespace(TEST_TABLE_NAMESPACE) + + runner = CliRunner() + result = runner.invoke(run, ["create", "namespace", TEST_TABLE_NAMESPACE]) + assert result.exit_code == 1 + assert result.output == "Namespace already exists: ('default',)\n" + + def test_rename_table(catalog: InMemoryCatalog) -> None: catalog.create_table( identifier=TEST_TABLE_IDENTIFIER, diff --git a/tests/conftest.py b/tests/conftest.py index d9a8dfdf07..5b488c70f0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -31,6 +31,7 @@ import string import uuid from datetime import datetime +from pathlib import Path from random import choice from tempfile import TemporaryDirectory from typing import ( @@ -40,18 +41,15 @@ Generator, List, Optional, - Union, ) -from urllib.parse import urlparse import boto3 -import pyarrow as pa import pytest -from moto import mock_dynamodb, mock_glue -from moto.server import ThreadedMotoServer # type: ignore +from moto import mock_aws +from pyspark.sql import SparkSession from pyiceberg import schema -from pyiceberg.catalog import Catalog +from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.catalog.noop import NoopCatalog from pyiceberg.expressions import BoundReference from pyiceberg.io import ( @@ -59,8 +57,6 @@ GCS_PROJECT_ID, GCS_TOKEN, GCS_TOKEN_EXPIRES_AT_MS, - OutputFile, - OutputStream, fsspec, load_file_io, ) @@ -87,9 +83,10 @@ from pyiceberg.utils.datetime import datetime_to_millis if TYPE_CHECKING: - from pytest import ExitCode, Session + import pyarrow as pa + from moto.server import ThreadedMotoServer # type: ignore - from pyiceberg.io.pyarrow import PyArrowFile, PyArrowFileIO + from pyiceberg.io.pyarrow import PyArrowFileIO def pytest_collection_modifyitems(items: List[pytest.Item]) -> None: @@ -269,7 +266,9 @@ def table_schema_nested_with_struct_key_map() -> Schema: @pytest.fixture(scope="session") -def pyarrow_schema_simple_without_ids() -> pa.Schema: +def pyarrow_schema_simple_without_ids() -> "pa.Schema": + import pyarrow as pa + return pa.schema([ pa.field('foo', pa.string(), nullable=True), pa.field('bar', pa.int32(), nullable=False), @@ -278,7 +277,9 @@ def pyarrow_schema_simple_without_ids() -> pa.Schema: @pytest.fixture(scope="session") -def pyarrow_schema_nested_without_ids() -> pa.Schema: +def pyarrow_schema_nested_without_ids() -> "pa.Schema": + import pyarrow as pa + return pa.schema([ pa.field('foo', pa.string(), nullable=False), pa.field('bar', pa.int32(), nullable=False), @@ -1453,40 +1454,6 @@ def simple_map() -> MapType: return MapType(key_id=19, key_type=StringType(), value_id=25, value_type=DoubleType(), value_required=False) -class LocalOutputFile(OutputFile): - """An OutputFile implementation for local files (for test use only).""" - - def __init__(self, location: str) -> None: - parsed_location = urlparse(location) # Create a ParseResult from the uri - if ( - parsed_location.scheme and parsed_location.scheme != "file" - ): # Validate that an uri is provided with a scheme of `file` - raise ValueError("LocalOutputFile location must have a scheme of `file`") - elif parsed_location.netloc: - raise ValueError(f"Network location is not allowed for LocalOutputFile: {parsed_location.netloc}") - - super().__init__(location=location) - self._path = parsed_location.path - - def __len__(self) -> int: - """Return the length of an instance of the LocalOutputFile class.""" - return os.path.getsize(self._path) - - def exists(self) -> bool: - return os.path.exists(self._path) - - def to_input_file(self) -> "PyArrowFile": - from pyiceberg.io.pyarrow import PyArrowFileIO - - return PyArrowFileIO().new_input(location=self.location) - - def create(self, overwrite: bool = False) -> OutputStream: - output_file = open(self._path, "wb" if overwrite else "xb") - if not issubclass(type(output_file), OutputStream): - raise TypeError("Object returned from LocalOutputFile.create(...) does not match the OutputStream protocol.") - return output_file - - @pytest.fixture(scope="session") def generated_manifest_entry_file(avro_schema_manifest_entry: Dict[str, Any]) -> Generator[str, None, None]: from fastavro import parse_schema, writer @@ -1761,53 +1728,45 @@ def fixture_aws_credentials() -> Generator[None, None, None]: os.environ.pop("AWS_DEFAULT_REGION") -MOTO_SERVER = ThreadedMotoServer(ip_address="localhost", port=5001) - - -def pytest_sessionfinish( - session: "Session", - exitstatus: Union[int, "ExitCode"], -) -> None: - if MOTO_SERVER._server_ready: - MOTO_SERVER.stop() - - @pytest.fixture(scope="session") -def moto_server() -> ThreadedMotoServer: - # this will throw an exception if the port is already in use - is_port_in_use(MOTO_SERVER._ip_address, MOTO_SERVER._port) - MOTO_SERVER.start() - return MOTO_SERVER +def moto_server() -> "ThreadedMotoServer": + from moto.server import ThreadedMotoServer + server = ThreadedMotoServer(ip_address="localhost", port=5001) -def is_port_in_use(ip_address: str, port: int) -> None: + # this will throw an exception if the port is already in use with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.bind((ip_address, port)) + s.bind((server._ip_address, server._port)) + + server.start() + yield server + server.stop() @pytest.fixture(scope="session") -def moto_endpoint_url(moto_server: ThreadedMotoServer) -> str: +def moto_endpoint_url(moto_server: "ThreadedMotoServer") -> str: _url = f"http://{moto_server._ip_address}:{moto_server._port}" return _url -@pytest.fixture(name="_s3") +@pytest.fixture(name="_s3", scope="function") def fixture_s3(_aws_credentials: None, moto_endpoint_url: str) -> Generator[boto3.client, None, None]: """Yield a mocked S3 client.""" - yield boto3.client("s3", region_name="us-east-1", endpoint_url=moto_endpoint_url) + with mock_aws(): + yield boto3.client("s3", region_name="us-east-1", endpoint_url=moto_endpoint_url) @pytest.fixture(name="_glue") def fixture_glue(_aws_credentials: None) -> Generator[boto3.client, None, None]: """Yield a mocked glue client.""" - with mock_glue(): + with mock_aws(): yield boto3.client("glue", region_name="us-east-1") @pytest.fixture(name="_dynamodb") def fixture_dynamodb(_aws_credentials: None) -> Generator[boto3.client, None, None]: """Yield a mocked DynamoDB client.""" - with mock_dynamodb(): + with mock_aws(): yield boto3.client("dynamodb", region_name="us-east-1") @@ -1899,7 +1858,8 @@ def get_s3_path(bucket_name: str, database_name: Optional[str] = None, table_nam @pytest.fixture(name="s3", scope="module") def fixture_s3_client() -> boto3.client: - yield boto3.client("s3") + with mock_aws(): + yield boto3.client("s3") def clean_up(test_catalog: Catalog) -> None: @@ -1934,6 +1894,11 @@ def example_task(data_file: str) -> FileScanTask: ) +@pytest.fixture(scope="session") +def warehouse(tmp_path_factory: pytest.TempPathFactory) -> Path: + return tmp_path_factory.mktemp("test_sql") + + @pytest.fixture def table_v1(example_table_metadata_v1: Dict[str, Any]) -> Table: table_metadata = TableMetadataV1(**example_table_metadata_v1) @@ -1961,3 +1926,51 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table: @pytest.fixture def bound_reference_str() -> BoundReference[str]: return BoundReference(field=NestedField(1, "field", StringType(), required=False), accessor=Accessor(position=0, inner=None)) + + +@pytest.fixture(scope="session") +def session_catalog() -> Catalog: + return load_catalog( + "local", + **{ + "type": "rest", + "uri": "http://localhost:8181", + "s3.endpoint": "http://localhost:9000", + "s3.access-key-id": "admin", + "s3.secret-access-key": "password", + }, + ) + + +@pytest.fixture(scope="session") +def spark() -> SparkSession: + import importlib.metadata + import os + + spark_version = ".".join(importlib.metadata.version("pyspark").split(".")[:2]) + scala_version = "2.12" + iceberg_version = "1.4.3" + + os.environ["PYSPARK_SUBMIT_ARGS"] = ( + f"--packages org.apache.iceberg:iceberg-spark-runtime-{spark_version}_{scala_version}:{iceberg_version}," + f"org.apache.iceberg:iceberg-aws-bundle:{iceberg_version} pyspark-shell" + ) + os.environ["AWS_REGION"] = "us-east-1" + os.environ["AWS_ACCESS_KEY_ID"] = "admin" + os.environ["AWS_SECRET_ACCESS_KEY"] = "password" + + spark = ( + SparkSession.builder.appName("PyIceberg integration test") + .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") + .config("spark.sql.catalog.integration", "org.apache.iceberg.spark.SparkCatalog") + .config("spark.sql.catalog.integration.catalog-impl", "org.apache.iceberg.rest.RESTCatalog") + .config("spark.sql.catalog.integration.uri", "http://localhost:8181") + .config("spark.sql.catalog.integration.io-impl", "org.apache.iceberg.aws.s3.S3FileIO") + .config("spark.sql.catalog.integration.warehouse", "s3://warehouse/wh/") + .config("spark.sql.catalog.integration.s3.endpoint", "http://localhost:9000") + .config("spark.sql.catalog.integration.s3.path-style-access", "true") + .config("spark.sql.defaultCatalog", "integration") + .getOrCreate() + ) + + return spark diff --git a/tests/integration/test_partition_evolution.py b/tests/integration/test_partition_evolution.py new file mode 100644 index 0000000000..16feef565d --- /dev/null +++ b/tests/integration/test_partition_evolution.py @@ -0,0 +1,490 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name + +import pytest + +from pyiceberg.catalog import Catalog, load_catalog +from pyiceberg.exceptions import NoSuchTableError +from pyiceberg.partitioning import PartitionField, PartitionSpec +from pyiceberg.schema import Schema +from pyiceberg.table import Table +from pyiceberg.transforms import ( + BucketTransform, + DayTransform, + HourTransform, + IdentityTransform, + MonthTransform, + TruncateTransform, + VoidTransform, + YearTransform, +) +from pyiceberg.types import ( + LongType, + NestedField, + StringType, + TimestampType, +) + + +@pytest.fixture() +def catalog_rest() -> Catalog: + return load_catalog( + "local", + **{ + "type": "rest", + "uri": "http://localhost:8181", + "s3.endpoint": "http://localhost:9000", + "s3.access-key-id": "admin", + "s3.secret-access-key": "password", + }, + ) + + +@pytest.fixture() +def catalog_hive() -> Catalog: + return load_catalog( + "local", + **{ + "type": "hive", + "uri": "http://localhost:9083", + "s3.endpoint": "http://localhost:9000", + "s3.access-key-id": "admin", + "s3.secret-access-key": "password", + }, + ) + + +def _simple_table(catalog: Catalog, table_schema_simple: Schema) -> Table: + return _create_table_with_schema(catalog, table_schema_simple, "1") + + +def _table(catalog: Catalog) -> Table: + schema_with_timestamp = Schema( + NestedField(1, "id", LongType(), required=False), + NestedField(2, "event_ts", TimestampType(), required=False), + NestedField(3, "str", StringType(), required=False), + ) + return _create_table_with_schema(catalog, schema_with_timestamp, "1") + + +def _table_v2(catalog: Catalog) -> Table: + schema_with_timestamp = Schema( + NestedField(1, "id", LongType(), required=False), + NestedField(2, "event_ts", TimestampType(), required=False), + NestedField(3, "str", StringType(), required=False), + ) + return _create_table_with_schema(catalog, schema_with_timestamp, "2") + + +def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version: str) -> Table: + tbl_name = "default.test_schema_evolution" + try: + catalog.drop_table(tbl_name) + except NoSuchTableError: + pass + return catalog.create_table(identifier=tbl_name, schema=schema, properties={"format-version": format_version}) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_add_identity_partition(catalog: Catalog, table_schema_simple: Schema) -> None: + simple_table = _simple_table(catalog, table_schema_simple) + simple_table.update_spec().add_identity("foo").commit() + specs = simple_table.specs() + assert len(specs) == 2 + spec = simple_table.spec() + assert spec.spec_id == 1 + assert spec.last_assigned_field_id == 1000 + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_add_year(catalog: Catalog) -> None: + table = _table(catalog) + table.update_spec().add_field("event_ts", YearTransform(), "year_transform").commit() + _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, YearTransform(), "year_transform")) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_add_month(catalog: Catalog) -> None: + table = _table(catalog) + table.update_spec().add_field("event_ts", MonthTransform(), "month_transform").commit() + _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, MonthTransform(), "month_transform")) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_add_day(catalog: Catalog) -> None: + table = _table(catalog) + table.update_spec().add_field("event_ts", DayTransform(), "day_transform").commit() + _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, DayTransform(), "day_transform")) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_add_hour(catalog: Catalog) -> None: + table = _table(catalog) + table.update_spec().add_field("event_ts", HourTransform(), "hour_transform").commit() + _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, HourTransform(), "hour_transform")) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None: + simple_table = _create_table_with_schema(catalog, table_schema_simple, "1") + simple_table.update_spec().add_field("foo", BucketTransform(12), "bucket_transform").commit() + _validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, BucketTransform(12), "bucket_transform")) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None: + simple_table = _create_table_with_schema(catalog, table_schema_simple, "1") + simple_table.update_spec().add_field("foo", TruncateTransform(1), "truncate_transform").commit() + _validate_new_partition_fields( + simple_table, 1000, 1, 1000, PartitionField(1, 1000, TruncateTransform(1), "truncate_transform") + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_multiple_adds(catalog: Catalog) -> None: + table = _table(catalog) + table.update_spec().add_identity("id").add_field("event_ts", HourTransform(), "hourly_partitioned").add_field( + "str", TruncateTransform(2), "truncate_str" + ).commit() + _validate_new_partition_fields( + table, + 1002, + 1, + 1002, + PartitionField(1, 1000, IdentityTransform(), "id"), + PartitionField(2, 1001, HourTransform(), "hourly_partitioned"), + PartitionField(3, 1002, TruncateTransform(2), "truncate_str"), + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_add_hour_to_day(catalog: Catalog) -> None: + table = _table(catalog) + table.update_spec().add_field("event_ts", DayTransform(), "daily_partitioned").commit() + table.update_spec().add_field("event_ts", HourTransform(), "hourly_partitioned").commit() + _validate_new_partition_fields( + table, + 1001, + 2, + 1001, + PartitionField(2, 1000, DayTransform(), "daily_partitioned"), + PartitionField(2, 1001, HourTransform(), "hourly_partitioned"), + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_add_multiple_buckets(catalog: Catalog) -> None: + table = _table(catalog) + table.update_spec().add_field("id", BucketTransform(16)).add_field("id", BucketTransform(4)).commit() + _validate_new_partition_fields( + table, + 1001, + 1, + 1001, + PartitionField(1, 1000, BucketTransform(16), "id_bucket_16"), + PartitionField(1, 1001, BucketTransform(4), "id_bucket_4"), + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_remove_identity(catalog: Catalog) -> None: + table = _table(catalog) + table.update_spec().add_identity("id").commit() + table.update_spec().remove_field("id").commit() + assert len(table.specs()) == 3 + assert table.spec().spec_id == 2 + assert table.spec() == PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name='id'), spec_id=2 + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_remove_identity_v2(catalog: Catalog) -> None: + table_v2 = _table_v2(catalog) + table_v2.update_spec().add_identity("id").commit() + table_v2.update_spec().remove_field("id").commit() + assert len(table_v2.specs()) == 2 + assert table_v2.spec().spec_id == 0 + assert table_v2.spec() == PartitionSpec(spec_id=0) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_remove_bucket(catalog: Catalog) -> None: + table = _table(catalog) + with table.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + update.add_field("event_ts", DayTransform(), "day_ts") + with table.update_spec() as remove: + remove.remove_field("bucketed_id") + + assert len(table.specs()) == 3 + _validate_new_partition_fields( + table, + 1001, + 2, + 1001, + PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name='bucketed_id'), + PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name='day_ts'), + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_remove_bucket_v2(catalog: Catalog) -> None: + table_v2 = _table_v2(catalog) + with table_v2.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + update.add_field("event_ts", DayTransform(), "day_ts") + with table_v2.update_spec() as remove: + remove.remove_field("bucketed_id") + assert len(table_v2.specs()) == 3 + _validate_new_partition_fields( + table_v2, 1001, 2, 1001, PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name='day_ts') + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_remove_day(catalog: Catalog) -> None: + table = _table(catalog) + with table.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + update.add_field("event_ts", DayTransform(), "day_ts") + with table.update_spec() as remove: + remove.remove_field("day_ts") + + assert len(table.specs()) == 3 + _validate_new_partition_fields( + table, + 1001, + 2, + 1001, + PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name='bucketed_id'), + PartitionField(source_id=2, field_id=1001, transform=VoidTransform(), name='day_ts'), + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_remove_day_v2(catalog: Catalog) -> None: + table_v2 = _table_v2(catalog) + with table_v2.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + update.add_field("event_ts", DayTransform(), "day_ts") + with table_v2.update_spec() as remove: + remove.remove_field("day_ts") + assert len(table_v2.specs()) == 3 + _validate_new_partition_fields( + table_v2, 1000, 2, 1001, PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name='bucketed_id') + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_rename(catalog: Catalog) -> None: + table = _table(catalog) + table.update_spec().add_identity("id").commit() + table.update_spec().rename_field("id", "sharded_id").commit() + assert len(table.specs()) == 3 + assert table.spec().spec_id == 2 + _validate_new_partition_fields(table, 1000, 2, 1000, PartitionField(1, 1000, IdentityTransform(), "sharded_id")) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_cannot_add_and_remove(catalog: Catalog) -> None: + table = _table(catalog) + with pytest.raises(ValueError) as exc_info: + table.update_spec().add_identity("id").remove_field("id").commit() + assert "Cannot delete newly added field id" in str(exc_info.value) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_cannot_add_redundant_time_partition(catalog: Catalog) -> None: + table = _table(catalog) + with pytest.raises(ValueError) as exc_info: + table.update_spec().add_field("event_ts", YearTransform(), "year_transform").add_field( + "event_ts", HourTransform(), "hour_transform" + ).commit() + assert "Cannot add time partition field: hour_transform conflicts with year_transform" in str(exc_info.value) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_cannot_delete_and_rename(catalog: Catalog) -> None: + table = _table(catalog) + with pytest.raises(ValueError) as exc_info: + table.update_spec().add_identity("id").commit() + table.update_spec().remove_field("id").rename_field("id", "sharded_id").commit() + assert "Cannot delete and rename partition field id" in str(exc_info.value) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_cannot_rename_and_delete(catalog: Catalog) -> None: + table = _table(catalog) + with pytest.raises(ValueError) as exc_info: + table.update_spec().add_identity("id").commit() + table.update_spec().rename_field("id", "sharded_id").remove_field("id").commit() + assert "Cannot rename and delete field id" in str(exc_info.value) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_cannot_add_same_tranform_for_same_field(catalog: Catalog) -> None: + table = _table(catalog) + with pytest.raises(ValueError) as exc_info: + table.update_spec().add_field("str", TruncateTransform(4), "truncated_str").add_field( + "str", TruncateTransform(4) + ).commit() + assert "Already added partition" in str(exc_info.value) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_cannot_add_same_field_multiple_times(catalog: Catalog) -> None: + table = _table(catalog) + with pytest.raises(ValueError) as exc_info: + table.update_spec().add_field("id", IdentityTransform(), "duplicate").add_field( + "id", IdentityTransform(), "duplicate" + ).commit() + assert "Already added partition" in str(exc_info.value) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_cannot_add_multiple_specs_same_name(catalog: Catalog) -> None: + table = _table(catalog) + with pytest.raises(ValueError) as exc_info: + table.update_spec().add_field("id", IdentityTransform(), "duplicate").add_field( + "event_ts", IdentityTransform(), "duplicate" + ).commit() + assert "Already added partition" in str(exc_info.value) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_change_specs_and_schema_transaction(catalog: Catalog) -> None: + table = _table(catalog) + with table.transaction() as transaction: + with transaction.update_spec() as update_spec: + update_spec.add_identity("id").add_field("event_ts", HourTransform(), "hourly_partitioned").add_field( + "str", TruncateTransform(2), "truncate_str" + ) + + with transaction.update_schema() as update_schema: + update_schema.add_column("col_string", StringType()) + + _validate_new_partition_fields( + table, + 1002, + 1, + 1002, + PartitionField(1, 1000, IdentityTransform(), "id"), + PartitionField(2, 1001, HourTransform(), "hourly_partitioned"), + PartitionField(3, 1002, TruncateTransform(2), "truncate_str"), + ) + + assert table.schema() == Schema( + NestedField(field_id=1, name='id', field_type=LongType(), required=False), + NestedField(field_id=2, name='event_ts', field_type=TimestampType(), required=False), + NestedField(field_id=3, name='str', field_type=StringType(), required=False), + NestedField(field_id=4, name='col_string', field_type=StringType(), required=False), + schema_id=1, + identifier_field_ids=[], + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_multiple_adds_and_remove_v1(catalog: Catalog) -> None: + table = _table(catalog) + with table.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + update.add_field("event_ts", DayTransform(), "day_ts") + with table.update_spec() as update: + update.remove_field("day_ts").remove_field("bucketed_id") + with table.update_spec() as update: + update.add_field("str", TruncateTransform(2), "truncated_str") + _validate_new_partition_fields( + table, + 1002, + 3, + 1002, + PartitionField(1, 1000, VoidTransform(), "bucketed_id"), + PartitionField(2, 1001, VoidTransform(), "day_ts"), + PartitionField(3, 1002, TruncateTransform(2), "truncated_str"), + ) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_multiple_adds_and_remove_v2(catalog: Catalog) -> None: + table_v2 = _table_v2(catalog) + with table_v2.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + update.add_field("event_ts", DayTransform(), "day_ts") + with table_v2.update_spec() as update: + update.remove_field("day_ts").remove_field("bucketed_id") + with table_v2.update_spec() as update: + update.add_field("str", TruncateTransform(2), "truncated_str") + _validate_new_partition_fields(table_v2, 1002, 2, 1002, PartitionField(3, 1002, TruncateTransform(2), "truncated_str")) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_multiple_remove_and_add_reuses_v2(catalog: Catalog) -> None: + table_v2 = _table_v2(catalog) + with table_v2.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + update.add_field("event_ts", DayTransform(), "day_ts") + with table_v2.update_spec() as update: + update.remove_field("day_ts").remove_field("bucketed_id") + with table_v2.update_spec() as update: + update.add_field("id", BucketTransform(16), "bucketed_id") + _validate_new_partition_fields(table_v2, 1000, 2, 1001, PartitionField(1, 1000, BucketTransform(16), "bucketed_id")) + + +def _validate_new_partition_fields( + table: Table, + expected_spec_last_assigned_field_id: int, + expected_spec_id: int, + expected_metadata_last_assigned_field_id: int, + *expected_partition_fields: PartitionField, +) -> None: + spec = table.spec() + assert spec.spec_id == expected_spec_id + assert spec.last_assigned_field_id == expected_spec_last_assigned_field_id + assert table.last_partition_id() == expected_metadata_last_assigned_field_id + assert len(spec.fields) == len(expected_partition_fields) + for i in range(len(spec.fields)): + assert spec.fields[i] == expected_partition_fields[i] diff --git a/tests/integration/test_partitioning_key.py b/tests/integration/test_partitioning_key.py new file mode 100644 index 0000000000..12056bac1e --- /dev/null +++ b/tests/integration/test_partitioning_key.py @@ -0,0 +1,772 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name +import uuid +from datetime import date, datetime, timedelta, timezone +from decimal import Decimal +from typing import Any, List + +import pytest +from pyspark.sql import SparkSession +from pyspark.sql.utils import AnalysisException + +from pyiceberg.catalog import Catalog +from pyiceberg.partitioning import PartitionField, PartitionFieldValue, PartitionKey, PartitionSpec +from pyiceberg.schema import Schema +from pyiceberg.transforms import ( + BucketTransform, + DayTransform, + HourTransform, + IdentityTransform, + MonthTransform, + TruncateTransform, + YearTransform, +) +from pyiceberg.typedef import Record +from pyiceberg.types import ( + BinaryType, + BooleanType, + DateType, + DecimalType, + DoubleType, + FixedType, + FloatType, + IntegerType, + LongType, + NestedField, + StringType, + TimestampType, + TimestamptzType, + UUIDType, +) + +TABLE_SCHEMA = Schema( + NestedField(field_id=1, name="boolean_field", field_type=BooleanType(), required=False), + NestedField(field_id=2, name="string_field", field_type=StringType(), required=False), + NestedField(field_id=3, name="string_long_field", field_type=StringType(), required=False), + NestedField(field_id=4, name="int_field", field_type=IntegerType(), required=False), + NestedField(field_id=5, name="long_field", field_type=LongType(), required=False), + NestedField(field_id=6, name="float_field", field_type=FloatType(), required=False), + NestedField(field_id=7, name="double_field", field_type=DoubleType(), required=False), + NestedField(field_id=8, name="timestamp_field", field_type=TimestampType(), required=False), + NestedField(field_id=9, name="timestamptz_field", field_type=TimestamptzType(), required=False), + NestedField(field_id=10, name="date_field", field_type=DateType(), required=False), + # NestedField(field_id=11, name="time", field_type=TimeType(), required=False), + NestedField(field_id=11, name="binary_field", field_type=BinaryType(), required=False), + NestedField(field_id=12, name="fixed_field", field_type=FixedType(16), required=False), + NestedField(field_id=13, name="decimal_field", field_type=DecimalType(5, 2), required=False), + NestedField(field_id=14, name="uuid_field", field_type=UUIDType(), required=False), +) + + +identifier = "default.test_table" + + +@pytest.mark.parametrize( + "partition_fields, partition_values, expected_partition_record, expected_hive_partition_path_slice, spark_create_table_sql_for_justification, spark_data_insert_sql_for_justification", + [ + # # Identity Transform + ( + [PartitionField(source_id=1, field_id=1001, transform=IdentityTransform(), name="boolean_field")], + [False], + Record(boolean_field=False), + "boolean_field=false", + f"""CREATE TABLE {identifier} ( + boolean_field boolean, + string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(boolean_field) -- Partitioning by 'boolean_field' + ) + """, + f"""INSERT INTO {identifier} + VALUES + (false, 'Boolean field set to false'); + """, + ), + ( + [PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="string_field")], + ["sample_string"], + Record(string_field="sample_string"), + "string_field=sample_string", + f"""CREATE TABLE {identifier} ( + string_field string, + another_string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(string_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + ('sample_string', 'Another string value') + """, + ), + ( + [PartitionField(source_id=4, field_id=1001, transform=IdentityTransform(), name="int_field")], + [42], + Record(int_field=42), + "int_field=42", + f"""CREATE TABLE {identifier} ( + int_field int, + string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(int_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (42, 'Associated string value for int 42') + """, + ), + ( + [PartitionField(source_id=5, field_id=1001, transform=IdentityTransform(), name="long_field")], + [1234567890123456789], + Record(long_field=1234567890123456789), + "long_field=1234567890123456789", + f"""CREATE TABLE {identifier} ( + long_field bigint, + string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(long_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (1234567890123456789, 'Associated string value for long 1234567890123456789') + """, + ), + ( + [PartitionField(source_id=6, field_id=1001, transform=IdentityTransform(), name="float_field")], + [3.14], + Record(float_field=3.14), + "float_field=3.14", + # spark writes differently as pyiceberg, Record[float_field=3.140000104904175], path:float_field=3.14 (Record has difference) + # so justification (compare expected value with spark behavior) would fail. + None, + None, + # f"""CREATE TABLE {identifier} ( + # float_field float, + # string_field string + # ) + # USING iceberg + # PARTITIONED BY ( + # identity(float_field) + # ) + # """, + # f"""INSERT INTO {identifier} + # VALUES + # (3.14, 'Associated string value for float 3.14') + # """ + ), + ( + [PartitionField(source_id=7, field_id=1001, transform=IdentityTransform(), name="double_field")], + [6.282], + Record(double_field=6.282), + "double_field=6.282", + # spark writes differently as pyiceberg, Record[double_field=6.2820000648498535] path:double_field=6.282 (Record has difference) + # so justification (compare expected value with spark behavior) would fail. + None, + None, + # f"""CREATE TABLE {identifier} ( + # double_field double, + # string_field string + # ) + # USING iceberg + # PARTITIONED BY ( + # identity(double_field) + # ) + # """, + # f"""INSERT INTO {identifier} + # VALUES + # (6.282, 'Associated string value for double 6.282') + # """ + ), + ( + [PartitionField(source_id=8, field_id=1001, transform=IdentityTransform(), name="timestamp_field")], + [datetime(2023, 1, 1, 12, 0, 1, 999)], + Record(timestamp_field=1672574401000999), + "timestamp_field=2023-01-01T12%3A00%3A01.000999", + f"""CREATE TABLE {identifier} ( + timestamp_field timestamp_ntz, + string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(timestamp_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 12:00:01.000999' AS TIMESTAMP_NTZ), 'Associated string value for timestamp 2023-01-01T12:00:00') + """, + ), + ( + [PartitionField(source_id=8, field_id=1001, transform=IdentityTransform(), name="timestamp_field")], + [datetime(2023, 1, 1, 12, 0, 1)], + Record(timestamp_field=1672574401000000), + "timestamp_field=2023-01-01T12%3A00%3A01", + f"""CREATE TABLE {identifier} ( + timestamp_field timestamp_ntz, + string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(timestamp_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 12:00:01' AS TIMESTAMP_NTZ), 'Associated string value for timestamp 2023-01-01T12:00:00') + """, + ), + ( + [PartitionField(source_id=8, field_id=1001, transform=IdentityTransform(), name="timestamp_field")], + [datetime(2023, 1, 1, 12, 0, 0)], + Record(timestamp_field=1672574400000000), + "timestamp_field=2023-01-01T12%3A00%3A00", + # Spark writes differently as pyiceberg, so justification (compare expected value with spark behavior) would fail + # AssertionError: assert 'timestamp_field=2023-01-01T12%3A00%3A00' in 's3://warehouse/default/test_table/data/timestamp_field=2023-01-01T12%3A00/00000-5-f9dca69a-9fb7-4830-9ef6-62d3d7afc09e-00001.parquet' + # TLDR: CAST('2023-01-01 12:00:00' AS TIMESTAMP_NTZ) becomes 2023-01-01T12:00 in the hive partition path when spark writes it (without the seconds). + None, + None, + # f"""CREATE TABLE {identifier} ( + # timestamp_field timestamp_ntz, + # string_field string + # ) + # USING iceberg + # PARTITIONED BY ( + # identity(timestamp_field) + # ) + # """, + # f"""INSERT INTO {identifier} + # VALUES + # (CAST('2023-01-01 12:00:00' AS TIMESTAMP_NTZ), 'Associated string value for timestamp 2023-01-01T12:00:00') + # """ + ), + ( + [PartitionField(source_id=9, field_id=1001, transform=IdentityTransform(), name="timestamptz_field")], + [datetime(2023, 1, 1, 12, 0, 1, 999, tzinfo=timezone(timedelta(hours=3)))], + Record(timestamptz_field=1672563601000999), + "timestamptz_field=2023-01-01T09%3A00%3A01.000999%2B00%3A00", + # Spark writes differently as pyiceberg, so justification (compare expected value with spark behavior) would fail + # AssertionError: assert 'timestamptz_field=2023-01-01T09%3A00%3A01.000999%2B00%3A00' in 's3://warehouse/default/test_table/data/timestamptz_field=2023-01-01T09%3A00%3A01.000999Z/00000-5-b710fc4d-66b6-47f1-b8ae-6208f8aaa2d4-00001.parquet' + # TLDR: CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP) becomes 2023-01-01T09:00:01.000999Z in the hive partition path when spark writes it (while iceberg: timestamptz_field=2023-01-01T09:00:01.000999+00:00). + None, + None, + # f"""CREATE TABLE {identifier} ( + # timestamptz_field timestamp, + # string_field string + # ) + # USING iceberg + # PARTITIONED BY ( + # identity(timestamptz_field) + # ) + # """, + # f"""INSERT INTO {identifier} + # VALUES + # (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Associated string value for timestamp 2023-01-01 12:00:01.000999+03:00') + # """ + ), + ( + [PartitionField(source_id=10, field_id=1001, transform=IdentityTransform(), name="date_field")], + [date(2023, 1, 1)], + Record(date_field=19358), + "date_field=2023-01-01", + f"""CREATE TABLE {identifier} ( + date_field date, + string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(date_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01' AS DATE), 'Associated string value for date 2023-01-01') + """, + ), + ( + [PartitionField(source_id=14, field_id=1001, transform=IdentityTransform(), name="uuid_field")], + [uuid.UUID("f47ac10b-58cc-4372-a567-0e02b2c3d479")], + Record(uuid_field="f47ac10b-58cc-4372-a567-0e02b2c3d479"), + "uuid_field=f47ac10b-58cc-4372-a567-0e02b2c3d479", + f"""CREATE TABLE {identifier} ( + uuid_field string, + string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(uuid_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + ('f47ac10b-58cc-4372-a567-0e02b2c3d479', 'Associated string value for UUID f47ac10b-58cc-4372-a567-0e02b2c3d479') + """, + ), + ( + [PartitionField(source_id=11, field_id=1001, transform=IdentityTransform(), name="binary_field")], + [b'example'], + Record(binary_field=b'example'), + "binary_field=ZXhhbXBsZQ%3D%3D", + f"""CREATE TABLE {identifier} ( + binary_field binary, + string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(binary_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('example' AS BINARY), 'Associated string value for binary `example`') + """, + ), + ( + [PartitionField(source_id=13, field_id=1001, transform=IdentityTransform(), name="decimal_field")], + [Decimal('123.45')], + Record(decimal_field=Decimal('123.45')), + "decimal_field=123.45", + f"""CREATE TABLE {identifier} ( + decimal_field decimal(5,2), + string_field string + ) + USING iceberg + PARTITIONED BY ( + identity(decimal_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (123.45, 'Associated string value for decimal 123.45') + """, + ), + # # Year Month Day Hour Transform + # Month Transform + ( + [PartitionField(source_id=8, field_id=1001, transform=MonthTransform(), name="timestamp_field_month")], + [datetime(2023, 1, 1, 11, 55, 59, 999999)], + Record(timestamp_field_month=((2023 - 1970) * 12)), + "timestamp_field_month=2023-01", + f"""CREATE TABLE {identifier} ( + timestamp_field timestamp_ntz, + string_field string + ) + USING iceberg + PARTITIONED BY ( + month(timestamp_field) -- Partitioning by month from 'timestamp_field' + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP_NTZ), 'Event at 2023-01-01 11:55:59.999999'); + """, + ), + ( + [PartitionField(source_id=9, field_id=1001, transform=MonthTransform(), name="timestamptz_field_month")], + [datetime(2023, 1, 1, 12, 0, 1, 999, tzinfo=timezone(timedelta(hours=3)))], + Record(timestamptz_field_month=((2023 - 1970) * 12 + 1 - 1)), + "timestamptz_field_month=2023-01", + f"""CREATE TABLE {identifier} ( + timestamptz_field timestamp, + string_field string + ) + USING iceberg + PARTITIONED BY ( + month(timestamptz_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00'); + """, + ), + ( + [PartitionField(source_id=10, field_id=1001, transform=MonthTransform(), name="date_field_month")], + [date(2023, 1, 1)], + Record(date_field_month=((2023 - 1970) * 12)), + "date_field_month=2023-01", + f"""CREATE TABLE {identifier} ( + date_field date, + string_field string + ) + USING iceberg + PARTITIONED BY ( + month(date_field) -- Partitioning by month from 'date_field' + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01'); + """, + ), + # Year Transform + ( + [PartitionField(source_id=8, field_id=1001, transform=YearTransform(), name="timestamp_field_year")], + [datetime(2023, 1, 1, 11, 55, 59, 999999)], + Record(timestamp_field_year=(2023 - 1970)), + "timestamp_field_year=2023", + f"""CREATE TABLE {identifier} ( + timestamp_field timestamp, + string_field string + ) + USING iceberg + PARTITIONED BY ( + year(timestamp_field) -- Partitioning by year from 'timestamp_field' + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), 'Event at 2023-01-01 11:55:59.999999'); + """, + ), + ( + [PartitionField(source_id=9, field_id=1001, transform=YearTransform(), name="timestamptz_field_year")], + [datetime(2023, 1, 1, 12, 0, 1, 999, tzinfo=timezone(timedelta(hours=3)))], + Record(timestamptz_field_year=53), + "timestamptz_field_year=2023", + f"""CREATE TABLE {identifier} ( + timestamptz_field timestamp, + string_field string + ) + USING iceberg + PARTITIONED BY ( + year(timestamptz_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00'); + """, + ), + ( + [PartitionField(source_id=10, field_id=1001, transform=YearTransform(), name="date_field_year")], + [date(2023, 1, 1)], + Record(date_field_year=(2023 - 1970)), + "date_field_year=2023", + f"""CREATE TABLE {identifier} ( + date_field date, + string_field string + ) + USING iceberg + PARTITIONED BY ( + year(date_field) -- Partitioning by year from 'date_field' + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01'); + """, + ), + # # Day Transform + ( + [PartitionField(source_id=8, field_id=1001, transform=DayTransform(), name="timestamp_field_day")], + [datetime(2023, 1, 1, 11, 55, 59, 999999)], + Record(timestamp_field_day=19358), + "timestamp_field_day=2023-01-01", + f"""CREATE TABLE {identifier} ( + timestamp_field timestamp, + string_field string + ) + USING iceberg + PARTITIONED BY ( + day(timestamp_field) -- Partitioning by day from 'timestamp_field' + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01'); + """, + ), + ( + [PartitionField(source_id=9, field_id=1001, transform=DayTransform(), name="timestamptz_field_day")], + [datetime(2023, 1, 1, 12, 0, 1, 999, tzinfo=timezone(timedelta(hours=3)))], + Record(timestamptz_field_day=19358), + "timestamptz_field_day=2023-01-01", + f"""CREATE TABLE {identifier} ( + timestamptz_field timestamp, + string_field string + ) + USING iceberg + PARTITIONED BY ( + day(timestamptz_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00'); + """, + ), + ( + [PartitionField(source_id=10, field_id=1001, transform=DayTransform(), name="date_field_day")], + [date(2023, 1, 1)], + Record(date_field_day=19358), + "date_field_day=2023-01-01", + f"""CREATE TABLE {identifier} ( + date_field date, + string_field string + ) + USING iceberg + PARTITIONED BY ( + day(date_field) -- Partitioning by day from 'date_field' + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01' AS DATE), 'Event on 2023-01-01'); + """, + ), + # Hour Transform + ( + [PartitionField(source_id=8, field_id=1001, transform=HourTransform(), name="timestamp_field_hour")], + [datetime(2023, 1, 1, 11, 55, 59, 999999)], + Record(timestamp_field_hour=464603), + "timestamp_field_hour=2023-01-01-11", + f"""CREATE TABLE {identifier} ( + timestamp_field timestamp, + string_field string + ) + USING iceberg + PARTITIONED BY ( + hour(timestamp_field) -- Partitioning by hour from 'timestamp_field' + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), 'Event within the 11th hour of 2023-01-01'); + """, + ), + ( + [PartitionField(source_id=9, field_id=1001, transform=HourTransform(), name="timestamptz_field_hour")], + [datetime(2023, 1, 1, 12, 0, 1, 999, tzinfo=timezone(timedelta(hours=3)))], + Record(timestamptz_field_hour=464601), + "timestamptz_field_hour=2023-01-01-09", + f"""CREATE TABLE {identifier} ( + timestamptz_field timestamp, + string_field string + ) + USING iceberg + PARTITIONED BY ( + hour(timestamptz_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP), 'Event at 2023-01-01 12:00:01.000999+03:00'); + """, + ), + # Truncate Transform + ( + [PartitionField(source_id=4, field_id=1001, transform=TruncateTransform(10), name="int_field_trunc")], + [12345], + Record(int_field_trunc=12340), + "int_field_trunc=12340", + f"""CREATE TABLE {identifier} ( + int_field int, + string_field string + ) + USING iceberg + PARTITIONED BY ( + truncate(int_field, 10) -- Truncating 'int_field' integer column to a width of 10 + ) + """, + f"""INSERT INTO {identifier} + VALUES + (12345, 'Sample data for int'); + """, + ), + ( + [PartitionField(source_id=5, field_id=1001, transform=TruncateTransform(2), name="bigint_field_trunc")], + [2**32 + 1], + Record(bigint_field_trunc=2**32), # 4294967296 + "bigint_field_trunc=4294967296", + f"""CREATE TABLE {identifier} ( + bigint_field bigint, + string_field string + ) + USING iceberg + PARTITIONED BY ( + truncate(bigint_field, 2) -- Truncating 'bigint_field' long column to a width of 2 + ) + """, + f"""INSERT INTO {identifier} + VALUES + (4294967297, 'Sample data for long'); + """, + ), + ( + [PartitionField(source_id=2, field_id=1001, transform=TruncateTransform(3), name="string_field_trunc")], + ["abcdefg"], + Record(string_field_trunc="abc"), + "string_field_trunc=abc", + f"""CREATE TABLE {identifier} ( + string_field string, + another_string_field string + ) + USING iceberg + PARTITIONED BY ( + truncate(string_field, 3) -- Truncating 'string_field' string column to a length of 3 characters + ) + """, + f"""INSERT INTO {identifier} + VALUES + ('abcdefg', 'Another sample for string'); + """, + ), + ( + [PartitionField(source_id=13, field_id=1001, transform=TruncateTransform(width=5), name="decimal_field_trunc")], + [Decimal('678.93')], + Record(decimal_field_trunc=Decimal('678.90')), + "decimal_field_trunc=678.90", # Assuming truncation width of 1 leads to truncating to 670 + f"""CREATE TABLE {identifier} ( + decimal_field decimal(5,2), + string_field string + ) + USING iceberg + PARTITIONED BY ( + truncate(decimal_field, 2) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (678.90, 'Associated string value for decimal 678.90') + """, + ), + ( + [PartitionField(source_id=11, field_id=1001, transform=TruncateTransform(10), name="binary_field_trunc")], + [b'HELLOICEBERG'], + Record(binary_field_trunc=b'HELLOICEBE'), + "binary_field_trunc=SEVMTE9JQ0VCRQ%3D%3D", + f"""CREATE TABLE {identifier} ( + binary_field binary, + string_field string + ) + USING iceberg + PARTITIONED BY ( + truncate(binary_field, 10) -- Truncating 'binary_field' binary column to a length of 10 bytes + ) + """, + f"""INSERT INTO {identifier} + VALUES + (binary('HELLOICEBERG'), 'Sample data for binary'); + """, + ), + # Bucket Transform + ( + [PartitionField(source_id=4, field_id=1001, transform=BucketTransform(2), name="int_field_bucket")], + [10], + Record(int_field_bucket=0), + "int_field_bucket=0", + f"""CREATE TABLE {identifier} ( + int_field int, + string_field string + ) + USING iceberg + PARTITIONED BY ( + bucket(2, int_field) -- Distributing 'int_field' across 2 buckets + ) + """, + f"""INSERT INTO {identifier} + VALUES + (10, 'Integer with value 10'); + """, + ), + # Test multiple field combinations could generate the Partition record and hive partition path correctly + ( + [ + PartitionField(source_id=8, field_id=1001, transform=YearTransform(), name="timestamp_field_year"), + PartitionField(source_id=10, field_id=1002, transform=DayTransform(), name="date_field_day"), + ], + [ + datetime(2023, 1, 1, 11, 55, 59, 999999), + date(2023, 1, 1), + ], + Record(timestamp_field_year=53, date_field_day=19358), + "timestamp_field_year=2023/date_field_day=2023-01-01", + f"""CREATE TABLE {identifier} ( + timestamp_field timestamp, + date_field date, + string_field string + ) + USING iceberg + PARTITIONED BY ( + year(timestamp_field), + day(date_field) + ) + """, + f"""INSERT INTO {identifier} + VALUES + (CAST('2023-01-01 11:55:59.999999' AS TIMESTAMP), CAST('2023-01-01' AS DATE), 'some data'); + """, + ), + ], +) +@pytest.mark.integration +def test_partition_key( + session_catalog: Catalog, + spark: SparkSession, + partition_fields: List[PartitionField], + partition_values: List[Any], + expected_partition_record: Record, + expected_hive_partition_path_slice: str, + spark_create_table_sql_for_justification: str, + spark_data_insert_sql_for_justification: str, +) -> None: + partition_field_values = [PartitionFieldValue(field, value) for field, value in zip(partition_fields, partition_values)] + spec = PartitionSpec(*partition_fields) + + key = PartitionKey( + raw_partition_field_values=partition_field_values, + partition_spec=spec, + schema=TABLE_SCHEMA, + ) + + # key.partition is used to write the metadata in DataFile, ManifestFile and all above layers + assert key.partition == expected_partition_record + # key.to_path() generates the hive partitioning part of the to-write parquet file path + assert key.to_path() == expected_hive_partition_path_slice + + # Justify expected values are not made up but conform to spark behaviors + if spark_create_table_sql_for_justification is not None and spark_data_insert_sql_for_justification is not None: + try: + spark.sql(f"drop table {identifier}") + except AnalysisException: + pass + + spark.sql(spark_create_table_sql_for_justification) + spark.sql(spark_data_insert_sql_for_justification) + + iceberg_table = session_catalog.load_table(identifier=identifier) + snapshot = iceberg_table.current_snapshot() + assert snapshot + spark_partition_for_justification = ( + snapshot.manifests(iceberg_table.io)[0].fetch_manifest_entry(iceberg_table.io)[0].data_file.partition + ) + spark_path_for_justification = ( + snapshot.manifests(iceberg_table.io)[0].fetch_manifest_entry(iceberg_table.io)[0].data_file.file_path + ) + assert spark_partition_for_justification == expected_partition_record + assert expected_hive_partition_path_slice in spark_path_for_justification diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 63537e3fc4..072fd7db25 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -22,10 +22,12 @@ import pyarrow.parquet as pq import pytest +from hive_metastore.ttypes import LockRequest, LockResponse, LockState, UnlockRequest from pyarrow.fs import S3FileSystem from pyiceberg.catalog import Catalog, load_catalog -from pyiceberg.exceptions import NoSuchTableError +from pyiceberg.catalog.hive import HiveCatalog, _HiveClient +from pyiceberg.exceptions import CommitFailedException, NoSuchTableError from pyiceberg.expressions import ( And, EqualTo, @@ -177,6 +179,28 @@ def test_pyarrow_limit(catalog: Catalog) -> None: assert len(full_result) == 10 +@pytest.mark.integration +@pytest.mark.filterwarnings("ignore") +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_daft_nan(catalog: Catalog) -> None: + table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") + df = table_test_null_nan_rewritten.to_daft() + assert df.count_rows() == 3 + assert math.isnan(df.to_pydict()["col_numeric"][0]) + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) +def test_daft_nan_rewritten(catalog: Catalog) -> None: + table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") + df = table_test_null_nan_rewritten.to_daft() + df = df.where(df["col_numeric"].float.is_nan()) + df = df.select("idx", "col_numeric") + assert df.count_rows() == 1 + assert df.to_pydict()["idx"][0] == 1 + assert math.isnan(df.to_pydict()["col_numeric"][0]) + + @pytest.mark.integration @pytest.mark.filterwarnings("ignore") @pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) @@ -223,7 +247,11 @@ def test_ray_all_types(catalog: Catalog) -> None: @pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')]) def test_pyarrow_to_iceberg_all_types(catalog: Catalog) -> None: table_test_all_types = catalog.load_table("default.test_all_types") - fs = S3FileSystem(endpoint_override="http://localhost:9000", access_key="admin", secret_key="password") + fs = S3FileSystem( + endpoint_override=catalog.properties["s3.endpoint"], + access_key=catalog.properties["s3.access-key-id"], + secret_key=catalog.properties["s3.secret-access-key"], + ) data_file_paths = [task.file.file_path for task in table_test_all_types.scan().plan_files()] for data_file_path in data_file_paths: uri = urlparse(data_file_path) @@ -440,3 +468,25 @@ def test_null_list_and_map(catalog: Catalog) -> None: # assert arrow_table["col_list_with_struct"].to_pylist() == [None, [{'test': 1}]] # Once https://github.com/apache/arrow/issues/38809 has been fixed assert arrow_table["col_list_with_struct"].to_pylist() == [[], [{'test': 1}]] + + +@pytest.mark.integration +def test_hive_locking(catalog_hive: HiveCatalog) -> None: + table = create_table(catalog_hive) + + database_name: str + table_name: str + _, database_name, table_name = table.identifier + + hive_client: _HiveClient = _HiveClient(catalog_hive.properties["uri"]) + blocking_lock_request: LockRequest = catalog_hive._create_lock_request(database_name, table_name) + + with hive_client as open_client: + # Force a lock on the test table + lock: LockResponse = open_client.lock(blocking_lock_request) + assert lock.state == LockState.ACQUIRED + try: + with pytest.raises(CommitFailedException, match="(Failed to acquire lock for).*"): + table.transaction().set_properties(lock="fail").commit_transaction() + finally: + open_client.unlock(UnlockRequest(lock.lockid)) diff --git a/tests/integration/test_rest_schema.py b/tests/integration/test_rest_schema.py index 5ee8d358b0..4c758e4c3e 100644 --- a/tests/integration/test_rest_schema.py +++ b/tests/integration/test_rest_schema.py @@ -20,8 +20,12 @@ from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.exceptions import CommitFailedException, NoSuchTableError, ValidationError +from pyiceberg.partitioning import PartitionField, PartitionSpec from pyiceberg.schema import Schema, prune_columns -from pyiceberg.table import Table, UpdateSchema +from pyiceberg.table import Table, TableProperties, UpdateSchema +from pyiceberg.table.name_mapping import MappedField, NameMapping, create_mapping_from_schema +from pyiceberg.table.sorting import SortField, SortOrder +from pyiceberg.transforms import IdentityTransform from pyiceberg.types import ( BinaryType, BooleanType, @@ -70,13 +74,17 @@ def _create_table_with_schema(catalog: Catalog, schema: Schema) -> Table: catalog.drop_table(tbl_name) except NoSuchTableError: pass - return catalog.create_table(identifier=tbl_name, schema=schema) + return catalog.create_table( + identifier=tbl_name, + schema=schema, + properties={TableProperties.DEFAULT_NAME_MAPPING: create_mapping_from_schema(schema).model_dump_json()}, + ) @pytest.mark.integration def test_add_already_exists(catalog: Catalog, table_schema_nested: Schema) -> None: table = _create_table_with_schema(catalog, table_schema_nested) - update = UpdateSchema(table) + update = table.update_schema() with pytest.raises(ValueError) as exc_info: update.add_column("foo", IntegerType()) @@ -90,7 +98,7 @@ def test_add_already_exists(catalog: Catalog, table_schema_nested: Schema) -> No @pytest.mark.integration def test_add_to_non_struct_type(catalog: Catalog, table_schema_simple: Schema) -> None: table = _create_table_with_schema(catalog, table_schema_simple) - update = UpdateSchema(table) + update = table.update_schema() with pytest.raises(ValueError) as exc_info: update.add_column(path=("foo", "lat"), field_type=IntegerType()) assert "Cannot add column 'lat' to non-struct type: foo" in str(exc_info.value) @@ -671,6 +679,13 @@ def test_rename_simple(simple_table: Table) -> None: identifier_field_ids=[2], ) + # Check that the name mapping gets updated + assert simple_table.name_mapping() == NameMapping([ + MappedField(field_id=1, names=['foo', 'vo']), + MappedField(field_id=2, names=['bar']), + MappedField(field_id=3, names=['baz']), + ]) + @pytest.mark.integration def test_rename_simple_nested(catalog: Catalog) -> None: @@ -698,6 +713,11 @@ def test_rename_simple_nested(catalog: Catalog) -> None: ), ) + # Check that the name mapping gets updated + assert tbl.name_mapping() == NameMapping([ + MappedField(field_id=1, names=['foo'], fields=[MappedField(field_id=2, names=['bar', 'vo'])]), + ]) + @pytest.mark.integration def test_rename_simple_nested_with_dots(catalog: Catalog) -> None: @@ -1046,13 +1066,13 @@ def test_add_nested_list_of_structs(catalog: Catalog) -> None: def test_add_required_column(catalog: Catalog) -> None: schema_ = Schema(NestedField(field_id=1, name="a", field_type=BooleanType(), required=False)) table = _create_table_with_schema(catalog, schema_) - update = UpdateSchema(table) + update = table.update_schema() with pytest.raises(ValueError) as exc_info: update.add_column(path="data", field_type=IntegerType(), required=True) assert "Incompatible change: cannot add required column: data" in str(exc_info.value) new_schema = ( - UpdateSchema(table, allow_incompatible_changes=True) # pylint: disable=W0212 + UpdateSchema(transaction=table.transaction(), allow_incompatible_changes=True) .add_column(path="data", field_type=IntegerType(), required=True) ._apply() ) @@ -1068,12 +1088,13 @@ def test_add_required_column_case_insensitive(catalog: Catalog) -> None: table = _create_table_with_schema(catalog, schema_) with pytest.raises(ValueError) as exc_info: - with UpdateSchema(table, allow_incompatible_changes=True) as update: - update.case_sensitive(False).add_column(path="ID", field_type=IntegerType(), required=True) + with table.transaction() as txn: + with txn.update_schema(allow_incompatible_changes=True) as update: + update.case_sensitive(False).add_column(path="ID", field_type=IntegerType(), required=True) assert "already exists: ID" in str(exc_info.value) new_schema = ( - UpdateSchema(table, allow_incompatible_changes=True) # pylint: disable=W0212 + UpdateSchema(transaction=table.transaction(), allow_incompatible_changes=True) .add_column(path="ID", field_type=IntegerType(), required=True) ._apply() ) @@ -1244,7 +1265,7 @@ def test_mixed_changes(catalog: Catalog) -> None: @pytest.mark.integration def test_ambiguous_column(catalog: Catalog, table_schema_nested: Schema) -> None: table = _create_table_with_schema(catalog, table_schema_nested) - update = UpdateSchema(table) + update = UpdateSchema(transaction=table.transaction()) with pytest.raises(ValueError) as exc_info: update.add_column(path="location.latitude", field_type=IntegerType()) @@ -2487,13 +2508,40 @@ def test_two_add_schemas_in_a_single_transaction(catalog: Catalog) -> None: ), ) - with pytest.raises(ValueError) as exc_info: + with pytest.raises(CommitFailedException) as exc_info: with tbl.transaction() as tr: with tr.update_schema() as update: update.add_column("bar", field_type=StringType()) with tr.update_schema() as update: update.add_column("baz", field_type=StringType()) - assert "Updates in a single commit need to be unique, duplicate: " in str( - exc_info.value + assert "CommitFailedException: Requirement failed: current schema changed: expected id 1 != 0" in str(exc_info.value) + + +@pytest.mark.integration +def test_create_table_integrity_after_fresh_assignment(catalog: Catalog) -> None: + schema = Schema( + NestedField(field_id=5, name="col_uuid", field_type=UUIDType(), required=False), + NestedField(field_id=4, name="col_fixed", field_type=FixedType(25), required=False), + ) + partition_spec = PartitionSpec( + PartitionField(source_id=5, field_id=1000, transform=IdentityTransform(), name="col_uuid"), spec_id=0 + ) + sort_order = SortOrder(SortField(source_id=4, transform=IdentityTransform())) + tbl_name = "default.test_create_integrity" + try: + catalog.drop_table(tbl_name) + except NoSuchTableError: + pass + tbl = catalog.create_table(identifier=tbl_name, schema=schema, partition_spec=partition_spec, sort_order=sort_order) + expected_schema = Schema( + NestedField(field_id=1, name="col_uuid", field_type=UUIDType(), required=False), + NestedField(field_id=2, name="col_fixed", field_type=FixedType(25), required=False), + ) + expected_spec = PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="col_uuid"), spec_id=0 ) + expected_sort_order = SortOrder(SortField(source_id=2, transform=IdentityTransform())) + assert tbl.schema() == expected_schema + assert tbl.spec() == expected_spec + assert tbl.sort_order() == expected_sort_order diff --git a/tests/integration/test_writes.py b/tests/integration/test_writes.py index 17dc997163..f0d1c85797 100644 --- a/tests/integration/test_writes.py +++ b/tests/integration/test_writes.py @@ -15,16 +15,28 @@ # specific language governing permissions and limitations # under the License. # pylint:disable=redefined-outer-name +import os +import time import uuid from datetime import date, datetime +from pathlib import Path +from typing import Any, Dict, List +from urllib.parse import urlparse import pyarrow as pa +import pyarrow.parquet as pq import pytest +import pytz +from pyarrow.fs import S3FileSystem +from pydantic_core import ValidationError from pyspark.sql import SparkSession +from pytest_mock.plugin import MockerFixture -from pyiceberg.catalog import Catalog, load_catalog -from pyiceberg.exceptions import NamespaceAlreadyExistsError, NoSuchTableError +from pyiceberg.catalog import Catalog, Properties, Table +from pyiceberg.catalog.sql import SqlCatalog +from pyiceberg.exceptions import NoSuchTableError from pyiceberg.schema import Schema +from pyiceberg.table import _dataframe_to_data_files from pyiceberg.types import ( BinaryType, BooleanType, @@ -40,28 +52,6 @@ TimestamptzType, ) - -@pytest.fixture() -def catalog() -> Catalog: - catalog = load_catalog( - "local", - **{ - "type": "rest", - "uri": "http://localhost:8181", - "s3.endpoint": "http://localhost:9000", - "s3.access-key-id": "admin", - "s3.secret-access-key": "password", - }, - ) - - try: - catalog.create_namespace("default") - except NamespaceAlreadyExistsError: - pass - - return catalog - - TEST_DATA_WITH_NULL = { 'bool': [False, None, True], 'string': ['a', None, 'z'], @@ -105,23 +95,8 @@ def catalog() -> Catalog: @pytest.fixture(scope="session") -def session_catalog() -> Catalog: - return load_catalog( - "local", - **{ - "type": "rest", - "uri": "http://localhost:8181", - "s3.endpoint": "http://localhost:9000", - "s3.access-key-id": "admin", - "s3.secret-access-key": "password", - }, - ) - - -@pytest.fixture(scope="session") -def arrow_table_with_null() -> pa.Table: - """PyArrow table with all kinds of columns""" - pa_schema = pa.schema([ +def pa_schema() -> pa.Schema: + return pa.schema([ ("bool", pa.bool_()), ("string", pa.string()), ("string_long", pa.string()), @@ -136,88 +111,102 @@ def arrow_table_with_null() -> pa.Table: # ("time", pa.time64("us")), # Not natively supported by Arrow # ("uuid", pa.fixed(16)), - ("binary", pa.binary()), + ("binary", pa.large_binary()), ("fixed", pa.binary(16)), ]) + + +@pytest.fixture(scope="session") +def arrow_table_with_null(pa_schema: pa.Schema) -> pa.Table: + """PyArrow table with all kinds of columns""" return pa.Table.from_pydict(TEST_DATA_WITH_NULL, schema=pa_schema) -@pytest.fixture(scope="session", autouse=True) -def table_v1_with_null(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: - identifier = "default.arrow_table_v1_with_null" +@pytest.fixture(scope="session") +def arrow_table_without_data(pa_schema: pa.Schema) -> pa.Table: + """PyArrow table with all kinds of columns""" + return pa.Table.from_pylist([], schema=pa_schema) + +@pytest.fixture(scope="session") +def arrow_table_with_only_nulls(pa_schema: pa.Schema) -> pa.Table: + """PyArrow table with all kinds of columns""" + return pa.Table.from_pylist([{}, {}], schema=pa_schema) + + +def _create_table(session_catalog: Catalog, identifier: str, properties: Properties, data: List[pa.Table]) -> Table: try: session_catalog.drop_table(identifier=identifier) except NoSuchTableError: pass - tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties={'format-version': '1'}) - tbl.append(arrow_table_with_null) + tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties=properties) + for d in data: + tbl.append(d) + return tbl + + +@pytest.fixture(scope="session", autouse=True) +def table_v1_with_null(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: + identifier = "default.arrow_table_v1_with_null" + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [arrow_table_with_null]) assert tbl.format_version == 1, f"Expected v1, got: v{tbl.format_version}" @pytest.fixture(scope="session", autouse=True) -def table_v1_appended_with_null(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: - identifier = "default.arrow_table_v1_appended_with_null" +def table_v1_without_data(session_catalog: Catalog, arrow_table_without_data: pa.Table) -> None: + identifier = "default.arrow_table_v1_without_data" + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [arrow_table_without_data]) + assert tbl.format_version == 1, f"Expected v1, got: v{tbl.format_version}" - try: - session_catalog.drop_table(identifier=identifier) - except NoSuchTableError: - pass - tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties={'format-version': '1'}) +@pytest.fixture(scope="session", autouse=True) +def table_v1_with_only_nulls(session_catalog: Catalog, arrow_table_with_only_nulls: pa.Table) -> None: + identifier = "default.arrow_table_v1_with_only_nulls" + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [arrow_table_with_only_nulls]) + assert tbl.format_version == 1, f"Expected v1, got: v{tbl.format_version}" - for _ in range(2): - tbl.append(arrow_table_with_null) +@pytest.fixture(scope="session", autouse=True) +def table_v1_appended_with_null(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: + identifier = "default.arrow_table_v1_appended_with_null" + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, 2 * [arrow_table_with_null]) assert tbl.format_version == 1, f"Expected v1, got: v{tbl.format_version}" @pytest.fixture(scope="session", autouse=True) def table_v2_with_null(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.arrow_table_v2_with_null" + tbl = _create_table(session_catalog, identifier, {"format-version": "2"}, [arrow_table_with_null]) + assert tbl.format_version == 2, f"Expected v2, got: v{tbl.format_version}" - try: - session_catalog.drop_table(identifier=identifier) - except NoSuchTableError: - pass - tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties={'format-version': '2'}) - tbl.append(arrow_table_with_null) +@pytest.fixture(scope="session", autouse=True) +def table_v2_without_data(session_catalog: Catalog, arrow_table_without_data: pa.Table) -> None: + identifier = "default.arrow_table_v2_without_data" + tbl = _create_table(session_catalog, identifier, {"format-version": "2"}, [arrow_table_without_data]) + assert tbl.format_version == 2, f"Expected v2, got: v{tbl.format_version}" + +@pytest.fixture(scope="session", autouse=True) +def table_v2_with_only_nulls(session_catalog: Catalog, arrow_table_with_only_nulls: pa.Table) -> None: + identifier = "default.arrow_table_v2_with_only_nulls" + tbl = _create_table(session_catalog, identifier, {"format-version": "2"}, [arrow_table_with_only_nulls]) assert tbl.format_version == 2, f"Expected v2, got: v{tbl.format_version}" @pytest.fixture(scope="session", autouse=True) def table_v2_appended_with_null(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.arrow_table_v2_appended_with_null" - - try: - session_catalog.drop_table(identifier=identifier) - except NoSuchTableError: - pass - - tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties={'format-version': '2'}) - - for _ in range(2): - tbl.append(arrow_table_with_null) - + tbl = _create_table(session_catalog, identifier, {"format-version": "2"}, 2 * [arrow_table_with_null]) assert tbl.format_version == 2, f"Expected v2, got: v{tbl.format_version}" @pytest.fixture(scope="session", autouse=True) def table_v1_v2_appended_with_null(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.arrow_table_v1_v2_appended_with_null" - - try: - session_catalog.drop_table(identifier=identifier) - except NoSuchTableError: - pass - - tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties={'format-version': '1'}) - tbl.append(arrow_table_with_null) - + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [arrow_table_with_null]) assert tbl.format_version == 1, f"Expected v1, got: v{tbl.format_version}" with tbl.transaction() as tx: @@ -228,40 +217,6 @@ def table_v1_v2_appended_with_null(session_catalog: Catalog, arrow_table_with_nu assert tbl.format_version == 2, f"Expected v2, got: v{tbl.format_version}" -@pytest.fixture(scope="session") -def spark() -> SparkSession: - import importlib.metadata - import os - - spark_version = ".".join(importlib.metadata.version("pyspark").split(".")[:2]) - scala_version = "2.12" - iceberg_version = "1.4.3" - - os.environ["PYSPARK_SUBMIT_ARGS"] = ( - f"--packages org.apache.iceberg:iceberg-spark-runtime-{spark_version}_{scala_version}:{iceberg_version}," - f"org.apache.iceberg:iceberg-aws-bundle:{iceberg_version} pyspark-shell" - ) - os.environ["AWS_REGION"] = "us-east-1" - os.environ["AWS_ACCESS_KEY_ID"] = "admin" - os.environ["AWS_SECRET_ACCESS_KEY"] = "password" - - spark = ( - SparkSession.builder.appName("PyIceberg integration test") - .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") - .config("spark.sql.catalog.integration", "org.apache.iceberg.spark.SparkCatalog") - .config("spark.sql.catalog.integration.catalog-impl", "org.apache.iceberg.rest.RESTCatalog") - .config("spark.sql.catalog.integration.uri", "http://localhost:8181") - .config("spark.sql.catalog.integration.io-impl", "org.apache.iceberg.aws.s3.S3FileIO") - .config("spark.sql.catalog.integration.warehouse", "s3://warehouse/wh/") - .config("spark.sql.catalog.integration.s3.endpoint", "http://localhost:9000") - .config("spark.sql.catalog.integration.s3.path-style-access", "true") - .config("spark.sql.defaultCatalog", "integration") - .getOrCreate() - ) - - return spark - - @pytest.mark.integration @pytest.mark.parametrize("format_version", [1, 2]) def test_query_count(spark: SparkSession, format_version: int) -> None: @@ -279,6 +234,26 @@ def test_query_filter_null(spark: SparkSession, col: str, format_version: int) - assert df.where(f"{col} is not null").count() == 2, f"Expected 2 rows for {col}" +@pytest.mark.integration +@pytest.mark.parametrize("col", TEST_DATA_WITH_NULL.keys()) +@pytest.mark.parametrize("format_version", [1, 2]) +def test_query_filter_without_data(spark: SparkSession, col: str, format_version: int) -> None: + identifier = f"default.arrow_table_v{format_version}_without_data" + df = spark.table(identifier) + assert df.where(f"{col} is null").count() == 0, f"Expected 0 row for {col}" + assert df.where(f"{col} is not null").count() == 0, f"Expected 0 rows for {col}" + + +@pytest.mark.integration +@pytest.mark.parametrize("col", TEST_DATA_WITH_NULL.keys()) +@pytest.mark.parametrize("format_version", [1, 2]) +def test_query_filter_only_nulls(spark: SparkSession, col: str, format_version: int) -> None: + identifier = f"default.arrow_table_v{format_version}_with_only_nulls" + df = spark.table(identifier) + assert df.where(f"{col} is null").count() == 2, f"Expected 2 row for {col}" + assert df.where(f"{col} is not null").count() == 0, f"Expected 0 rows for {col}" + + @pytest.mark.integration @pytest.mark.parametrize("col", TEST_DATA_WITH_NULL.keys()) @pytest.mark.parametrize("format_version", [1, 2]) @@ -301,15 +276,7 @@ def test_query_filter_v1_v2_append_null(spark: SparkSession, col: str) -> None: @pytest.mark.integration def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.arrow_table_summaries" - - try: - session_catalog.drop_table(identifier=identifier) - except NoSuchTableError: - pass - tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties={'format-version': '1'}) - - tbl.append(arrow_table_with_null) - tbl.append(arrow_table_with_null) + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, 2 * [arrow_table_with_null]) tbl.overwrite(arrow_table_with_null) rows = spark.sql( @@ -327,39 +294,39 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi assert summaries[0] == { 'added-data-files': '1', - 'added-files-size': '5283', + 'added-files-size': '5459', 'added-records': '3', 'total-data-files': '1', 'total-delete-files': '0', 'total-equality-deletes': '0', - 'total-files-size': '5283', + 'total-files-size': '5459', 'total-position-deletes': '0', 'total-records': '3', } assert summaries[1] == { 'added-data-files': '1', - 'added-files-size': '5283', + 'added-files-size': '5459', 'added-records': '3', 'total-data-files': '2', 'total-delete-files': '0', 'total-equality-deletes': '0', - 'total-files-size': '10566', + 'total-files-size': '10918', 'total-position-deletes': '0', 'total-records': '6', } assert summaries[2] == { 'added-data-files': '1', - 'added-files-size': '5283', + 'added-files-size': '5459', 'added-records': '3', 'deleted-data-files': '2', 'deleted-records': '6', - 'removed-files-size': '10566', + 'removed-files-size': '10918', 'total-data-files': '1', 'total-delete-files': '0', 'total-equality-deletes': '0', - 'total-files-size': '5283', + 'total-files-size': '5459', 'total-position-deletes': '0', 'total-records': '3', } @@ -368,12 +335,7 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi @pytest.mark.integration def test_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.arrow_data_files" - - try: - session_catalog.drop_table(identifier=identifier) - except NoSuchTableError: - pass - tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties={'format-version': '1'}) + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, []) tbl.overwrite(arrow_table_with_null) # should produce a DELETE entry @@ -393,19 +355,289 @@ def test_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_w assert [row.deleted_data_files_count for row in rows] == [0, 0, 1, 0, 0] +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +@pytest.mark.parametrize( + "properties, expected_compression_name", + [ + # REST catalog uses Zstandard by default: https://github.com/apache/iceberg/pull/8593 + ({}, "ZSTD"), + ({"write.parquet.compression-codec": "uncompressed"}, "UNCOMPRESSED"), + ({"write.parquet.compression-codec": "gzip", "write.parquet.compression-level": "1"}, "GZIP"), + ({"write.parquet.compression-codec": "zstd", "write.parquet.compression-level": "1"}, "ZSTD"), + ({"write.parquet.compression-codec": "snappy"}, "SNAPPY"), + ], +) +def test_write_parquet_compression_properties( + spark: SparkSession, + session_catalog: Catalog, + arrow_table_with_null: pa.Table, + format_version: int, + properties: Dict[str, Any], + expected_compression_name: str, +) -> None: + identifier = "default.write_parquet_compression_properties" + + tbl = _create_table(session_catalog, identifier, {"format-version": format_version, **properties}, [arrow_table_with_null]) + + data_file_paths = [task.file.file_path for task in tbl.scan().plan_files()] + + fs = S3FileSystem( + endpoint_override=session_catalog.properties["s3.endpoint"], + access_key=session_catalog.properties["s3.access-key-id"], + secret_key=session_catalog.properties["s3.secret-access-key"], + ) + uri = urlparse(data_file_paths[0]) + with fs.open_input_file(f"{uri.netloc}{uri.path}") as f: + parquet_metadata = pq.read_metadata(f) + compression = parquet_metadata.row_group(0).column(0).compression + + assert compression == expected_compression_name + + +@pytest.mark.integration +@pytest.mark.parametrize( + "properties, expected_kwargs", + [ + ({"write.parquet.page-size-bytes": "42"}, {"data_page_size": 42}), + ({"write.parquet.dict-size-bytes": "42"}, {"dictionary_pagesize_limit": 42}), + ], +) +def test_write_parquet_other_properties( + mocker: MockerFixture, + spark: SparkSession, + session_catalog: Catalog, + arrow_table_with_null: pa.Table, + properties: Dict[str, Any], + expected_kwargs: Dict[str, Any], +) -> None: + print(type(mocker)) + identifier = "default.test_write_parquet_other_properties" + + # The properties we test cannot be checked on the resulting Parquet file, so we spy on the ParquetWriter call instead + ParquetWriter = mocker.spy(pq, "ParquetWriter") + _create_table(session_catalog, identifier, properties, [arrow_table_with_null]) + + call_kwargs = ParquetWriter.call_args[1] + for key, value in expected_kwargs.items(): + assert call_kwargs.get(key) == value + + +@pytest.mark.integration +@pytest.mark.parametrize( + "properties", + [ + {"write.parquet.row-group-size-bytes": "42"}, + {"write.parquet.page-row-limit": "42"}, + {"write.parquet.bloom-filter-enabled.column.bool": "42"}, + {"write.parquet.bloom-filter-max-bytes": "42"}, + ], +) +def test_write_parquet_unsupported_properties( + spark: SparkSession, + session_catalog: Catalog, + arrow_table_with_null: pa.Table, + properties: Dict[str, str], +) -> None: + identifier = "default.write_parquet_unsupported_properties" + + tbl = _create_table(session_catalog, identifier, properties, []) + with pytest.raises(NotImplementedError): + tbl.append(arrow_table_with_null) + + @pytest.mark.integration def test_invalid_arguments(spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.arrow_data_files" + tbl = _create_table(session_catalog, identifier, {'format-version': '1'}, []) + + with pytest.raises(ValueError, match="Expected PyArrow table, got: not a df"): + tbl.overwrite("not a df") + + with pytest.raises(ValueError, match="Expected PyArrow table, got: not a df"): + tbl.append("not a df") + + +@pytest.mark.integration +def test_summaries_with_only_nulls( + spark: SparkSession, session_catalog: Catalog, arrow_table_without_data: pa.Table, arrow_table_with_only_nulls: pa.Table +) -> None: + identifier = "default.arrow_table_summaries_with_only_nulls" + tbl = _create_table( + session_catalog, identifier, {'format-version': '1'}, [arrow_table_without_data, arrow_table_with_only_nulls] + ) + tbl.overwrite(arrow_table_without_data) + + rows = spark.sql( + f""" + SELECT operation, summary + FROM {identifier}.snapshots + ORDER BY committed_at ASC + """ + ).collect() + + operations = [row.operation for row in rows] + assert operations == ['append', 'append', 'overwrite'] + + summaries = [row.summary for row in rows] + + assert summaries[0] == { + 'total-data-files': '0', + 'total-delete-files': '0', + 'total-equality-deletes': '0', + 'total-files-size': '0', + 'total-position-deletes': '0', + 'total-records': '0', + } + + assert summaries[1] == { + 'added-data-files': '1', + 'added-files-size': '4239', + 'added-records': '2', + 'total-data-files': '1', + 'total-delete-files': '0', + 'total-equality-deletes': '0', + 'total-files-size': '4239', + 'total-position-deletes': '0', + 'total-records': '2', + } + + assert summaries[0] == { + 'total-data-files': '0', + 'total-delete-files': '0', + 'total-equality-deletes': '0', + 'total-files-size': '0', + 'total-position-deletes': '0', + 'total-records': '0', + } + + +@pytest.mark.integration +def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> None: + os.environ['TZ'] = 'Etc/UTC' + time.tzset() + tz = pytz.timezone(os.environ['TZ']) + + catalog = SqlCatalog("test_sql_catalog", uri="sqlite:///:memory:", warehouse=f"/{warehouse}") + catalog.create_namespace("default") + + identifier = "default.arrow_table_v1_with_null" + tbl = _create_table(catalog, identifier, {}, [arrow_table_with_null]) + location = tbl.metadata_location + + import duckdb + + duckdb.sql('INSTALL iceberg; LOAD iceberg;') + result = duckdb.sql( + f""" + SELECT * + FROM iceberg_scan('{location}') + """ + ).fetchall() + + assert result == [ + ( + False, + 'a', + 'aaaaaaaaaaaaaaaaaaaaaa', + 1, + 1, + 0.0, + 0.0, + datetime(2023, 1, 1, 19, 25), + datetime(2023, 1, 1, 19, 25, tzinfo=tz), + date(2023, 1, 1), + b'\x01', + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + ), + (None, None, None, None, None, None, None, None, None, None, None, None), + ( + True, + 'z', + 'zzzzzzzzzzzzzzzzzzzzzz', + 9, + 9, + 0.8999999761581421, + 0.9, + datetime(2023, 3, 1, 19, 25), + datetime(2023, 3, 1, 19, 25, tzinfo=tz), + date(2023, 3, 1), + b'\x12', + b'\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11', + ), + ] + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None: + identifier = f"default.arrow_write_data_and_evolve_schema_v{format_version}" try: session_catalog.drop_table(identifier=identifier) except NoSuchTableError: pass - tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties={'format-version': '1'}) + pa_table = pa.Table.from_pydict( + { + 'foo': ['a', None, 'z'], + }, + schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), + ) - with pytest.raises(ValueError, match="Expected PyArrow table, got: not a df"): - tbl.overwrite("not a df") + tbl = session_catalog.create_table( + identifier=identifier, schema=pa_table.schema, properties={"format-version": str(format_version)} + ) - with pytest.raises(ValueError, match="Expected PyArrow table, got: not a df"): - tbl.append("not a df") + pa_table_with_column = pa.Table.from_pydict( + { + 'foo': ['a', None, 'z'], + 'bar': [19, None, 25], + }, + schema=pa.schema([ + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=True), + ]), + ) + + with tbl.transaction() as txn: + with txn.update_schema() as schema_txn: + schema_txn.union_by_name(pa_table_with_column.schema) + + with txn.update_snapshot().fast_append() as snapshot_update: + for data_file in _dataframe_to_data_files(table_metadata=txn.table_metadata, df=pa_table_with_column, io=tbl.io): + snapshot_update.append_data_file(data_file) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_table_properties_int_value( + session_catalog: Catalog, + arrow_table_with_null: pa.Table, + format_version: int, +) -> None: + # table properties can be set to int, but still serialized to string + property_with_int = {"property_name": 42} + identifier = "default.test_table_properties_int_value" + + tbl = _create_table( + session_catalog, identifier, {"format-version": format_version, **property_with_int}, [arrow_table_with_null] + ) + assert isinstance(tbl.properties["property_name"], str) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_table_properties_raise_for_none_value( + session_catalog: Catalog, + arrow_table_with_null: pa.Table, + format_version: int, +) -> None: + property_with_none = {"property_name": None} + identifier = "default.test_table_properties_raise_for_none_value" + + with pytest.raises(ValidationError) as exc_info: + _ = _create_table( + session_catalog, identifier, {"format-version": format_version, **property_with_none}, [arrow_table_with_null] + ) + assert "None type is not a supported value in properties: property_name" in str(exc_info.value) diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index 0628ed4893..a3dd56db7f 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -18,6 +18,7 @@ import os import tempfile +from datetime import date from typing import Any, List, Optional from unittest.mock import MagicMock, patch from uuid import uuid4 @@ -59,7 +60,9 @@ ICEBERG_SCHEMA, PyArrowFile, PyArrowFileIO, + StatsAggregator, _ConvertToArrowSchema, + _primitive_to_physical, _read_deletes, expression_to_pyarrow, project_table, @@ -84,6 +87,7 @@ LongType, MapType, NestedField, + PrimitiveType, StringType, StructType, TimestampType, @@ -463,7 +467,7 @@ def test_string_type_to_pyarrow() -> None: def test_binary_type_to_pyarrow() -> None: iceberg_type = BinaryType() - assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.binary() + assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.large_binary() def test_struct_type_to_pyarrow(table_schema_simple: Schema) -> None: @@ -1666,3 +1670,43 @@ def check_results(location: str, expected_schema: str, expected_netloc: str, exp def test_make_compatible_name() -> None: assert make_compatible_name("label/abc") == "label_x2Fabc" assert make_compatible_name("label?abc") == "label_x3Fabc" + + +@pytest.mark.parametrize( + "vals, primitive_type, expected_result", + [ + ([None, 2, 1], IntegerType(), 1), + ([1, None, 2], IntegerType(), 1), + ([None, None, None], IntegerType(), None), + ([None, date(2024, 2, 4), date(2024, 1, 2)], DateType(), date(2024, 1, 2)), + ([date(2024, 1, 2), None, date(2024, 2, 4)], DateType(), date(2024, 1, 2)), + ([None, None, None], DateType(), None), + ], +) +def test_stats_aggregator_update_min(vals: List[Any], primitive_type: PrimitiveType, expected_result: Any) -> None: + stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type)) + + for val in vals: + stats.update_min(val) + + assert stats.current_min == expected_result + + +@pytest.mark.parametrize( + "vals, primitive_type, expected_result", + [ + ([None, 2, 1], IntegerType(), 2), + ([1, None, 2], IntegerType(), 2), + ([None, None, None], IntegerType(), None), + ([None, date(2024, 2, 4), date(2024, 1, 2)], DateType(), date(2024, 2, 4)), + ([date(2024, 1, 2), None, date(2024, 2, 4)], DateType(), date(2024, 2, 4)), + ([None, None, None], DateType(), None), + ], +) +def test_stats_aggregator_update_max(vals: List[Any], primitive_type: PrimitiveType, expected_result: Any) -> None: + stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type)) + + for val in vals: + stats.update_max(val) + + assert stats.current_max == expected_result diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py index c7f364b920..5b55bd61b6 100644 --- a/tests/io/test_pyarrow_visitor.py +++ b/tests/io/test_pyarrow_visitor.py @@ -84,6 +84,18 @@ def test_pyarrow_boolean_to_iceberg() -> None: assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pyarrow_type +def test_pyarrow_int8_to_iceberg() -> None: + pyarrow_type = pa.int8() + converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg()) + assert converted_iceberg_type == IntegerType() + + +def test_pyarrow_int16_to_iceberg() -> None: + pyarrow_type = pa.int16() + converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg()) + assert converted_iceberg_type == IntegerType() + + def test_pyarrow_int32_to_iceberg() -> None: pyarrow_type = pa.int32() converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg()) @@ -203,7 +215,7 @@ def test_pyarrow_string_to_iceberg() -> None: def test_pyarrow_variable_binary_to_iceberg() -> None: - pyarrow_type = pa.binary() + pyarrow_type = pa.large_binary() converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg()) assert converted_iceberg_type == BinaryType() assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pyarrow_type @@ -233,6 +245,26 @@ def test_pyarrow_list_to_iceberg() -> None: assert visit_pyarrow(pyarrow_list, _ConvertToIceberg()) == expected +def test_pyarrow_large_list_to_iceberg() -> None: + pyarrow_list = pa.large_list(pa.field("element", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "1"})) + expected = ListType( + element_id=1, + element_type=IntegerType(), + element_required=True, + ) + assert visit_pyarrow(pyarrow_list, _ConvertToIceberg()) == expected + + +def test_pyarrow_fixed_size_list_to_iceberg() -> None: + pyarrow_list = pa.list_(pa.field("element", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "1"}), 1) + expected = ListType( + element_id=1, + element_type=IntegerType(), + element_required=True, + ) + assert visit_pyarrow(pyarrow_list, _ConvertToIceberg()) == expected + + def test_pyarrow_map_to_iceberg() -> None: pyarrow_map = pa.map_( pa.field("key", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "1"}), @@ -272,6 +304,15 @@ def test_round_schema_conversion_nested(table_schema_nested: Schema) -> None: assert actual == expected +def test_round_schema_large_string() -> None: + schema = pa.schema([pa.field("animals", pa.large_string())]) + actual = str(pyarrow_to_schema(schema, name_mapping=NameMapping([MappedField(field_id=1, names=["animals"])]))) + expected = """table { + 1: animals: optional string +}""" + assert actual == expected + + def test_simple_schema_has_missing_ids() -> None: schema = pa.schema([ pa.field('foo', pa.string(), nullable=False), diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 3e620b48fb..b8097f5fcf 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -17,9 +17,11 @@ # pylint:disable=redefined-outer-name import uuid from copy import copy -from typing import Dict +from typing import Any, Dict +import pyarrow as pa import pytest +from pydantic import ValidationError from sortedcontainers import SortedList from pyiceberg.catalog.noop import NoopCatalog @@ -42,6 +44,7 @@ from pyiceberg.schema import Schema from pyiceberg.table import ( AddSnapshotUpdate, + AddSortOrderUpdate, AssertCreate, AssertCurrentSchemaId, AssertDefaultSortOrderId, @@ -51,6 +54,7 @@ AssertRefSnapshotId, AssertTableUUID, RemovePropertiesUpdate, + SetDefaultSortOrderUpdate, SetPropertiesUpdate, SetSnapshotRefUpdate, SnapshotRef, @@ -58,12 +62,12 @@ Table, UpdateSchema, _apply_table_update, - _generate_snapshot_id, + _check_schema, _match_deletes_to_data_file, _TableMetadataUpdateContext, update_table_metadata, ) -from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER, TableMetadataUtil, TableMetadataV2 +from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER, TableMetadataUtil, TableMetadataV2, _generate_snapshot_id from pyiceberg.table.snapshots import ( Operation, Snapshot, @@ -434,7 +438,7 @@ def test_serialize_set_properties_updates() -> None: def test_add_column(table_v2: Table) -> None: - update = UpdateSchema(table_v2) + update = UpdateSchema(transaction=table_v2.transaction()) update.add_column(path="b", field_type=IntegerType()) apply_schema: Schema = update._apply() # pylint: disable=W0212 assert len(apply_schema.fields) == 4 @@ -468,7 +472,7 @@ def test_add_primitive_type_column(table_v2: Table) -> None: for name, type_ in primitive_type.items(): field_name = f"new_column_{name}" - update = UpdateSchema(table_v2) + update = UpdateSchema(transaction=table_v2.transaction()) update.add_column(path=field_name, field_type=type_, doc=f"new_column_{name}") new_schema = update._apply() # pylint: disable=W0212 @@ -480,7 +484,7 @@ def test_add_primitive_type_column(table_v2: Table) -> None: def test_add_nested_type_column(table_v2: Table) -> None: # add struct type column field_name = "new_column_struct" - update = UpdateSchema(table_v2) + update = UpdateSchema(transaction=table_v2.transaction()) struct_ = StructType( NestedField(1, "lat", DoubleType()), NestedField(2, "long", DoubleType()), @@ -498,7 +502,7 @@ def test_add_nested_type_column(table_v2: Table) -> None: def test_add_nested_map_type_column(table_v2: Table) -> None: # add map type column field_name = "new_column_map" - update = UpdateSchema(table_v2) + update = UpdateSchema(transaction=table_v2.transaction()) map_ = MapType(1, StringType(), 2, IntegerType(), False) update.add_column(path=field_name, field_type=map_) new_schema = update._apply() # pylint: disable=W0212 @@ -510,7 +514,7 @@ def test_add_nested_map_type_column(table_v2: Table) -> None: def test_add_nested_list_type_column(table_v2: Table) -> None: # add list type column field_name = "new_column_list" - update = UpdateSchema(table_v2) + update = UpdateSchema(transaction=table_v2.transaction()) list_ = ListType( element_id=101, element_type=StructType( @@ -665,6 +669,26 @@ def test_update_metadata_set_snapshot_ref(table_v2: Table) -> None: ) +def test_update_metadata_add_update_sort_order(table_v2: Table) -> None: + new_sort_order = SortOrder(order_id=table_v2.sort_order().order_id + 1) + new_metadata = update_table_metadata( + table_v2.metadata, + (AddSortOrderUpdate(sort_order=new_sort_order), SetDefaultSortOrderUpdate(sort_order_id=-1)), + ) + assert len(new_metadata.sort_orders) == 2 + assert new_metadata.sort_orders[-1] == new_sort_order + assert new_metadata.default_sort_order_id == new_sort_order.order_id + + +def test_update_metadata_update_sort_order_invalid(table_v2: Table) -> None: + with pytest.raises(ValueError, match="Cannot set current sort order to the last added one when no sort order has been added"): + update_table_metadata(table_v2.metadata, (SetDefaultSortOrderUpdate(sort_order_id=-1),)) + + invalid_order_id = 10 + with pytest.raises(ValueError, match=f"Sort order with id {invalid_order_id} does not exist"): + update_table_metadata(table_v2.metadata, (SetDefaultSortOrderUpdate(sort_order_id=invalid_order_id),)) + + def test_update_metadata_with_multiple_updates(table_v1: Table) -> None: base_metadata = table_v1.metadata transaction = table_v1.transaction() @@ -785,7 +809,7 @@ def test_metadata_isolation_from_illegal_updates(table_v1: Table) -> None: def test_generate_snapshot_id(table_v2: Table) -> None: assert isinstance(_generate_snapshot_id(), int) - assert isinstance(table_v2.new_snapshot_id(), int) + assert isinstance(table_v2.metadata.new_snapshot_id(), int) def test_assert_create(table_v2: Table) -> None: @@ -987,3 +1011,105 @@ def test_correct_schema() -> None: _ = t.scan(snapshot_id=-1).projection() assert "Snapshot not found: -1" in str(exc_info.value) + + +def test_schema_mismatch_type(table_schema_simple: Schema) -> None: + other_schema = pa.schema(( + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.decimal128(18, 6), nullable=False), + pa.field("baz", pa.bool_(), nullable=True), + )) + + expected = r"""Mismatch in fields: +┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ ┃ Table field ┃ Dataframe field ┃ +┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ ✅ │ 1: foo: optional string │ 1: foo: optional string │ +│ ❌ │ 2: bar: required int │ 2: bar: required decimal\(18, 6\) │ +│ ✅ │ 3: baz: optional boolean │ 3: baz: optional boolean │ +└────┴──────────────────────────┴─────────────────────────────────┘ +""" + + with pytest.raises(ValueError, match=expected): + _check_schema(table_schema_simple, other_schema) + + +def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None: + other_schema = pa.schema(( + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=True), + pa.field("baz", pa.bool_(), nullable=True), + )) + + expected = """Mismatch in fields: +┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ ┃ Table field ┃ Dataframe field ┃ +┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ ✅ │ 1: foo: optional string │ 1: foo: optional string │ +│ ❌ │ 2: bar: required int │ 2: bar: optional int │ +│ ✅ │ 3: baz: optional boolean │ 3: baz: optional boolean │ +└────┴──────────────────────────┴──────────────────────────┘ +""" + + with pytest.raises(ValueError, match=expected): + _check_schema(table_schema_simple, other_schema) + + +def test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None: + other_schema = pa.schema(( + pa.field("foo", pa.string(), nullable=True), + pa.field("baz", pa.bool_(), nullable=True), + )) + + expected = """Mismatch in fields: +┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ ┃ Table field ┃ Dataframe field ┃ +┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ ✅ │ 1: foo: optional string │ 1: foo: optional string │ +│ ❌ │ 2: bar: required int │ Missing │ +│ ✅ │ 3: baz: optional boolean │ 3: baz: optional boolean │ +└────┴──────────────────────────┴──────────────────────────┘ +""" + + with pytest.raises(ValueError, match=expected): + _check_schema(table_schema_simple, other_schema) + + +def test_schema_mismatch_additional_field(table_schema_simple: Schema) -> None: + other_schema = pa.schema(( + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=True), + pa.field("baz", pa.bool_(), nullable=True), + pa.field("new_field", pa.date32(), nullable=True), + )) + + expected = r"PyArrow table contains more columns: new_field. Update the schema first \(hint, use union_by_name\)." + + with pytest.raises(ValueError, match=expected): + _check_schema(table_schema_simple, other_schema) + + +def test_table_properties(example_table_metadata_v2: Dict[str, Any]) -> None: + # metadata properties are all strings + for k, v in example_table_metadata_v2["properties"].items(): + assert isinstance(k, str) + assert isinstance(v, str) + metadata = TableMetadataV2(**example_table_metadata_v2) + for k, v in metadata.properties.items(): + assert isinstance(k, str) + assert isinstance(v, str) + + # property can be set to int, but still serialized as string + property_with_int = {"property_name": 42} + new_example_table_metadata_v2 = {**example_table_metadata_v2, "properties": property_with_int} + assert isinstance(new_example_table_metadata_v2["properties"]["property_name"], int) + new_metadata = TableMetadataV2(**new_example_table_metadata_v2) + assert isinstance(new_metadata.properties["property_name"], str) + + +def test_table_properties_raise_for_none_value(example_table_metadata_v2: Dict[str, Any]) -> None: + property_with_none = {"property_name": None} + example_table_metadata_v2 = {**example_table_metadata_v2, "properties": property_with_none} + with pytest.raises(ValidationError) as exc_info: + TableMetadataV2(**example_table_metadata_v2) + assert "None type is not a supported value in properties: property_name" in str(exc_info.value) diff --git a/tests/table/test_metadata.py b/tests/table/test_metadata.py index 816a4fbe44..c05700ecbb 100644 --- a/tests/table/test_metadata.py +++ b/tests/table/test_metadata.py @@ -198,6 +198,75 @@ def test_migrate_v1_partition_specs(example_table_metadata_v1: Dict[str, Any]) - ] +def test_new_table_metadata_with_explicit_v1_format() -> None: + schema = Schema( + NestedField(field_id=10, name="foo", field_type=StringType(), required=False), + NestedField(field_id=22, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=33, name="baz", field_type=BooleanType(), required=False), + schema_id=10, + identifier_field_ids=[22], + ) + + partition_spec = PartitionSpec( + PartitionField(source_id=22, field_id=1022, transform=IdentityTransform(), name="bar"), spec_id=10 + ) + + sort_order = SortOrder( + SortField(source_id=10, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST), + order_id=10, + ) + + actual = new_table_metadata( + schema=schema, + partition_spec=partition_spec, + sort_order=sort_order, + location="s3://some_v1_location/", + properties={'format-version': "1"}, + ) + + expected_schema = Schema( + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), + schema_id=0, + identifier_field_ids=[2], + ) + + expected_spec = PartitionSpec(PartitionField(source_id=2, field_id=1000, transform=IdentityTransform(), name="bar")) + + expected = TableMetadataV1( + location="s3://some_v1_location/", + table_uuid=actual.table_uuid, + last_updated_ms=actual.last_updated_ms, + last_column_id=3, + schemas=[expected_schema], + schema_=expected_schema, + current_schema_id=0, + partition_spec=[field.model_dump() for field in expected_spec.fields], + partition_specs=[expected_spec], + default_spec_id=0, + last_partition_id=1000, + properties={}, + current_snapshot_id=None, + snapshots=[], + snapshot_log=[], + metadata_log=[], + sort_orders=[ + SortOrder( + SortField( + source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST + ), + order_id=1, + ) + ], + default_sort_order_id=1, + refs={}, + format_version=1, + ) + + assert actual.model_dump() == expected.model_dump() + + def test_invalid_format_version(example_table_metadata_v1: Dict[str, Any]) -> None: """Test the exception when trying to load an unknown version""" diff --git a/tests/table/test_name_mapping.py b/tests/table/test_name_mapping.py index d74aa3234c..e039415ce3 100644 --- a/tests/table/test_name_mapping.py +++ b/tests/table/test_name_mapping.py @@ -17,7 +17,14 @@ import pytest from pyiceberg.schema import Schema -from pyiceberg.table.name_mapping import MappedField, NameMapping, create_mapping_from_schema, parse_mapping_from_json +from pyiceberg.table.name_mapping import ( + MappedField, + NameMapping, + create_mapping_from_schema, + parse_mapping_from_json, + update_mapping, +) +from pyiceberg.types import NestedField, StringType @pytest.fixture(scope="session") @@ -238,3 +245,67 @@ def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None: with pytest.raises(ValueError, match="Could not find field with name: boom"): table_name_mapping_nested.find("boom") + + +def test_invalid_mapped_field() -> None: + with pytest.raises(ValueError): + MappedField(field_id=1, names=[]) + + +def test_update_mapping_no_updates_or_adds(table_name_mapping_nested: NameMapping) -> None: + assert update_mapping(table_name_mapping_nested, {}, {}) == table_name_mapping_nested + + +def test_update_mapping(table_name_mapping_nested: NameMapping) -> None: + updates = {1: NestedField(1, "foo_update", StringType(), True)} + adds = { + -1: [NestedField(18, "add_18", StringType(), True)], + 15: [NestedField(19, "name", StringType(), True), NestedField(20, "add_20", StringType(), True)], + } + + expected = NameMapping([ + MappedField(field_id=1, names=['foo', 'foo_update']), + MappedField(field_id=2, names=['bar']), + MappedField(field_id=3, names=['baz']), + MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]), + MappedField( + field_id=6, + names=['quux'], + fields=[ + MappedField(field_id=7, names=['key']), + MappedField( + field_id=8, + names=['value'], + fields=[ + MappedField(field_id=9, names=['key']), + MappedField(field_id=10, names=['value']), + ], + ), + ], + ), + MappedField( + field_id=11, + names=['location'], + fields=[ + MappedField( + field_id=12, + names=['element'], + fields=[ + MappedField(field_id=13, names=['latitude']), + MappedField(field_id=14, names=['longitude']), + ], + ) + ], + ), + MappedField( + field_id=15, + names=['person'], + fields=[ + MappedField(field_id=17, names=['age']), + MappedField(field_id=19, names=['name']), + MappedField(field_id=20, names=['add_20']), + ], + ), + MappedField(field_id=18, names=['add_18']), + ]) + assert update_mapping(table_name_mapping_nested, updates, adds) == expected diff --git a/tests/test_schema.py b/tests/test_schema.py index a5487b7fd9..6394b72ba6 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -18,6 +18,7 @@ from textwrap import dedent from typing import Any, Dict, List +import pyarrow as pa import pytest from pyiceberg import schema @@ -927,7 +928,7 @@ def primitive_fields() -> List[NestedField]: def test_add_top_level_primitives(primitive_fields: NestedField) -> None: for primitive_field in primitive_fields: new_schema = Schema(primitive_field) - applied = UpdateSchema(None, schema=Schema()).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=Schema()).union_by_name(new_schema)._apply() # type: ignore assert applied == new_schema @@ -941,7 +942,7 @@ def test_add_top_level_list_of_primitives(primitive_fields: NestedField) -> None required=False, ) ) - applied = UpdateSchema(None, schema=Schema()).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=Schema()).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() @@ -957,7 +958,7 @@ def test_add_top_level_map_of_primitives(primitive_fields: NestedField) -> None: required=False, ) ) - applied = UpdateSchema(None, schema=Schema()).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=Schema()).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() @@ -971,7 +972,7 @@ def test_add_top_struct_of_primitives(primitive_fields: NestedField) -> None: required=False, ) ) - applied = UpdateSchema(None, schema=Schema()).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=Schema()).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() @@ -986,7 +987,7 @@ def test_add_nested_primitive(primitive_fields: NestedField) -> None: required=False, ) ) - applied = UpdateSchema(None, schema=current_schema).union_by_name(new_schema)._apply() + applied = UpdateSchema(None, None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() @@ -1006,7 +1007,7 @@ def test_add_nested_primitives(primitive_fields: NestedField) -> None: field_id=1, name="aStruct", field_type=StructType(*_primitive_fields(TEST_PRIMITIVE_TYPES, 2)), required=False ) ) - applied = UpdateSchema(None, schema=current_schema).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() @@ -1047,7 +1048,7 @@ def test_add_nested_lists(primitive_fields: NestedField) -> None: required=False, ) ) - applied = UpdateSchema(None, schema=Schema()).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=Schema()).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() @@ -1097,7 +1098,7 @@ def test_add_nested_struct(primitive_fields: NestedField) -> None: required=False, ) ) - applied = UpdateSchema(None, schema=Schema()).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=Schema()).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() @@ -1140,7 +1141,7 @@ def test_add_nested_maps(primitive_fields: NestedField) -> None: required=False, ) ) - applied = UpdateSchema(None, schema=Schema()).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=Schema()).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() @@ -1163,7 +1164,7 @@ def test_detect_invalid_top_level_list() -> None: ) with pytest.raises(ValidationError, match="Cannot change column type: aList.element: string -> double"): - _ = UpdateSchema(None, schema=current_schema).union_by_name(new_schema)._apply() + _ = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore def test_detect_invalid_top_level_maps() -> None: @@ -1185,14 +1186,14 @@ def test_detect_invalid_top_level_maps() -> None: ) with pytest.raises(ValidationError, match="Cannot change column type: aMap.key: string -> uuid"): - _ = UpdateSchema(None, schema=current_schema).union_by_name(new_schema)._apply() + _ = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore def test_promote_float_to_double() -> None: current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=FloatType(), required=False)) new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=DoubleType(), required=False)) - applied = UpdateSchema(None, schema=current_schema).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() assert len(applied.fields) == 1 @@ -1204,7 +1205,7 @@ def test_detect_invalid_promotion_double_to_float() -> None: new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=FloatType(), required=False)) with pytest.raises(ValidationError, match="Cannot change column type: aCol: double -> float"): - _ = UpdateSchema(None, schema=current_schema).union_by_name(new_schema)._apply() + _ = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore # decimal(P,S) Fixed-point decimal; precision P, scale S -> Scale is fixed [1], @@ -1213,7 +1214,7 @@ def test_type_promote_decimal_to_fixed_scale_with_wider_precision() -> None: current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=DecimalType(precision=20, scale=1), required=False)) new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=DecimalType(precision=22, scale=1), required=False)) - applied = UpdateSchema(None, schema=current_schema).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore assert applied.as_struct() == new_schema.as_struct() assert len(applied.fields) == 1 @@ -1281,7 +1282,7 @@ def test_add_nested_structs(primitive_fields: NestedField) -> None: required=False, ) ) - applied = UpdateSchema(None, schema=schema).union_by_name(new_schema)._apply() + applied = UpdateSchema(transaction=None, schema=schema).union_by_name(new_schema)._apply() # type: ignore expected = Schema( NestedField( @@ -1321,7 +1322,7 @@ def test_replace_list_with_primitive() -> None: new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=StringType())) with pytest.raises(ValidationError, match="Cannot change column type: list is not a primitive"): - _ = UpdateSchema(None, schema=current_schema).union_by_name(new_schema)._apply() + _ = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore def test_mirrored_schemas() -> None: @@ -1344,7 +1345,7 @@ def test_mirrored_schemas() -> None: NestedField(9, "string6", StringType(), required=False), ) - applied = UpdateSchema(None, schema=current_schema).union_by_name(mirrored_schema)._apply() + applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(mirrored_schema)._apply() # type: ignore assert applied.as_struct() == current_schema.as_struct() @@ -1396,7 +1397,7 @@ def test_add_new_top_level_struct() -> None: ), ) - applied = UpdateSchema(None, schema=current_schema).union_by_name(observed_schema)._apply() + applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(observed_schema)._apply() # type: ignore assert applied.as_struct() == observed_schema.as_struct() @@ -1475,7 +1476,7 @@ def test_append_nested_struct() -> None: ) ) - applied = UpdateSchema(None, schema=current_schema).union_by_name(observed_schema)._apply() + applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(observed_schema)._apply() # type: ignore assert applied.as_struct() == observed_schema.as_struct() @@ -1540,7 +1541,7 @@ def test_append_nested_lists() -> None: required=False, ) ) - union = UpdateSchema(None, schema=current_schema).union_by_name(observed_schema)._apply() + union = UpdateSchema(transaction=None, schema=current_schema).union_by_name(observed_schema)._apply() # type: ignore expected = Schema( NestedField( @@ -1579,3 +1580,23 @@ def test_append_nested_lists() -> None: ) assert union.as_struct() == expected.as_struct() + + +def test_union_with_pa_schema(primitive_fields: NestedField) -> None: + base_schema = Schema(NestedField(field_id=1, name="foo", field_type=StringType(), required=True)) + + pa_schema = pa.schema([ + pa.field("foo", pa.string(), nullable=False), + pa.field("bar", pa.int32(), nullable=True), + pa.field("baz", pa.bool_(), nullable=True), + ]) + + new_schema = UpdateSchema(transaction=None, schema=base_schema).union_by_name(pa_schema)._apply() # type: ignore + + expected_schema = Schema( + NestedField(field_id=1, name="foo", field_type=StringType(), required=True), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=False), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), + ) + + assert new_schema == expected_schema diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py index d694e15562..5e3f72ccc6 100644 --- a/tests/utils/test_config.py +++ b/tests/utils/test_config.py @@ -41,6 +41,20 @@ def test_from_environment_variables_uppercase() -> None: assert Config().get_catalog_config("PRODUCTION") == {"uri": "https://service.io/api"} +@mock.patch.dict( + os.environ, + { + "PYICEBERG_CATALOG__PRODUCTION__S3__REGION": "eu-north-1", + "PYICEBERG_CATALOG__PRODUCTION__S3__ACCESS_KEY_ID": "username", + }, +) +def test_fix_nested_objects_from_environment_variables() -> None: + assert Config().get_catalog_config("PRODUCTION") == { + 's3.region': 'eu-north-1', + 's3.access-key-id': 'username', + } + + def test_from_configuration_files(tmp_path_factory: pytest.TempPathFactory) -> None: config_path = str(tmp_path_factory.mktemp("config")) with open(f"{config_path}/.pyiceberg.yaml", "w", encoding=UTF8) as file: