diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index eeee2095a2..f1938f7428 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -16,13 +16,13 @@ # under the License. mkdocs==1.6.0 -griffe==0.47.0 +griffe==0.48.0 jinja2==3.1.4 mkdocstrings==0.25.1 mkdocstrings-python==1.10.5 mkdocs-literate-nav==0.6.1 mkdocs-autorefs==1.0.1 mkdocs-gen-files==0.5.0 -mkdocs-material==9.5.28 +mkdocs-material==9.5.29 mkdocs-material-extensions==1.3.1 mkdocs-section-index==0.3.9 diff --git a/poetry.lock b/poetry.lock index a32ec2a2af..f114cc7e57 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4380,57 +4380,108 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-it [[package]] name = "zstandard" -version = "0.22.0" +version = "0.23.0" description = "Zstandard bindings for Python" optional = false python-versions = ">=3.8" files = [ - {file = "zstandard-0.22.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:275df437ab03f8c033b8a2c181e51716c32d831082d93ce48002a5227ec93019"}, - {file = "zstandard-0.22.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ac9957bc6d2403c4772c890916bf181b2653640da98f32e04b96e4d6fb3252a"}, - {file = "zstandard-0.22.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe3390c538f12437b859d815040763abc728955a52ca6ff9c5d4ac707c4ad98e"}, - {file = "zstandard-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1958100b8a1cc3f27fa21071a55cb2ed32e9e5df4c3c6e661c193437f171cba2"}, - {file = "zstandard-0.22.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93e1856c8313bc688d5df069e106a4bc962eef3d13372020cc6e3ebf5e045202"}, - {file = "zstandard-0.22.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1a90ba9a4c9c884bb876a14be2b1d216609385efb180393df40e5172e7ecf356"}, - {file = "zstandard-0.22.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3db41c5e49ef73641d5111554e1d1d3af106410a6c1fb52cf68912ba7a343a0d"}, - {file = "zstandard-0.22.0-cp310-cp310-win32.whl", hash = "sha256:d8593f8464fb64d58e8cb0b905b272d40184eac9a18d83cf8c10749c3eafcd7e"}, - {file = "zstandard-0.22.0-cp310-cp310-win_amd64.whl", hash = "sha256:f1a4b358947a65b94e2501ce3e078bbc929b039ede4679ddb0460829b12f7375"}, - {file = "zstandard-0.22.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:589402548251056878d2e7c8859286eb91bd841af117dbe4ab000e6450987e08"}, - {file = "zstandard-0.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a97079b955b00b732c6f280d5023e0eefe359045e8b83b08cf0333af9ec78f26"}, - {file = "zstandard-0.22.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:445b47bc32de69d990ad0f34da0e20f535914623d1e506e74d6bc5c9dc40bb09"}, - {file = "zstandard-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33591d59f4956c9812f8063eff2e2c0065bc02050837f152574069f5f9f17775"}, - {file = "zstandard-0.22.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:888196c9c8893a1e8ff5e89b8f894e7f4f0e64a5af4d8f3c410f0319128bb2f8"}, - {file = "zstandard-0.22.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:53866a9d8ab363271c9e80c7c2e9441814961d47f88c9bc3b248142c32141d94"}, - {file = "zstandard-0.22.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4ac59d5d6910b220141c1737b79d4a5aa9e57466e7469a012ed42ce2d3995e88"}, - {file = "zstandard-0.22.0-cp311-cp311-win32.whl", hash = "sha256:2b11ea433db22e720758cba584c9d661077121fcf60ab43351950ded20283440"}, - {file = "zstandard-0.22.0-cp311-cp311-win_amd64.whl", hash = "sha256:11f0d1aab9516a497137b41e3d3ed4bbf7b2ee2abc79e5c8b010ad286d7464bd"}, - {file = "zstandard-0.22.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6c25b8eb733d4e741246151d895dd0308137532737f337411160ff69ca24f93a"}, - {file = "zstandard-0.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f9b2cde1cd1b2a10246dbc143ba49d942d14fb3d2b4bccf4618d475c65464912"}, - {file = "zstandard-0.22.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a88b7df61a292603e7cd662d92565d915796b094ffb3d206579aaebac6b85d5f"}, - {file = "zstandard-0.22.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:466e6ad8caefb589ed281c076deb6f0cd330e8bc13c5035854ffb9c2014b118c"}, - {file = "zstandard-0.22.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1d67d0d53d2a138f9e29d8acdabe11310c185e36f0a848efa104d4e40b808e4"}, - {file = "zstandard-0.22.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:39b2853efc9403927f9065cc48c9980649462acbdf81cd4f0cb773af2fd734bc"}, - {file = "zstandard-0.22.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8a1b2effa96a5f019e72874969394edd393e2fbd6414a8208fea363a22803b45"}, - {file = "zstandard-0.22.0-cp312-cp312-win32.whl", hash = "sha256:88c5b4b47a8a138338a07fc94e2ba3b1535f69247670abfe422de4e0b344aae2"}, - {file = "zstandard-0.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:de20a212ef3d00d609d0b22eb7cc798d5a69035e81839f549b538eff4105d01c"}, - {file = "zstandard-0.22.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d75f693bb4e92c335e0645e8845e553cd09dc91616412d1d4650da835b5449df"}, - {file = "zstandard-0.22.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:36a47636c3de227cd765e25a21dc5dace00539b82ddd99ee36abae38178eff9e"}, - {file = "zstandard-0.22.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68953dc84b244b053c0d5f137a21ae8287ecf51b20872eccf8eaac0302d3e3b0"}, - {file = "zstandard-0.22.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2612e9bb4977381184bb2463150336d0f7e014d6bb5d4a370f9a372d21916f69"}, - {file = "zstandard-0.22.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23d2b3c2b8e7e5a6cb7922f7c27d73a9a615f0a5ab5d0e03dd533c477de23004"}, - {file = "zstandard-0.22.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d43501f5f31e22baf822720d82b5547f8a08f5386a883b32584a185675c8fbf"}, - {file = "zstandard-0.22.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a493d470183ee620a3df1e6e55b3e4de8143c0ba1b16f3ded83208ea8ddfd91d"}, - {file = "zstandard-0.22.0-cp38-cp38-win32.whl", hash = "sha256:7034d381789f45576ec3f1fa0e15d741828146439228dc3f7c59856c5bcd3292"}, - {file = "zstandard-0.22.0-cp38-cp38-win_amd64.whl", hash = "sha256:d8fff0f0c1d8bc5d866762ae95bd99d53282337af1be9dc0d88506b340e74b73"}, - {file = "zstandard-0.22.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2fdd53b806786bd6112d97c1f1e7841e5e4daa06810ab4b284026a1a0e484c0b"}, - {file = "zstandard-0.22.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:73a1d6bd01961e9fd447162e137ed949c01bdb830dfca487c4a14e9742dccc93"}, - {file = "zstandard-0.22.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9501f36fac6b875c124243a379267d879262480bf85b1dbda61f5ad4d01b75a3"}, - {file = "zstandard-0.22.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48f260e4c7294ef275744210a4010f116048e0c95857befb7462e033f09442fe"}, - {file = "zstandard-0.22.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:959665072bd60f45c5b6b5d711f15bdefc9849dd5da9fb6c873e35f5d34d8cfb"}, - {file = "zstandard-0.22.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d22fdef58976457c65e2796e6730a3ea4a254f3ba83777ecfc8592ff8d77d303"}, - {file = "zstandard-0.22.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a7ccf5825fd71d4542c8ab28d4d482aace885f5ebe4b40faaa290eed8e095a4c"}, - {file = "zstandard-0.22.0-cp39-cp39-win32.whl", hash = "sha256:f058a77ef0ece4e210bb0450e68408d4223f728b109764676e1a13537d056bb0"}, - {file = "zstandard-0.22.0-cp39-cp39-win_amd64.whl", hash = "sha256:e9e9d4e2e336c529d4c435baad846a181e39a982f823f7e4495ec0b0ec8538d2"}, - {file = "zstandard-0.22.0.tar.gz", hash = "sha256:8226a33c542bcb54cd6bd0a366067b610b41713b64c9abec1bc4533d69f51e70"}, + {file = "zstandard-0.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf0a05b6059c0528477fba9054d09179beb63744355cab9f38059548fedd46a9"}, + {file = "zstandard-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fc9ca1c9718cb3b06634c7c8dec57d24e9438b2aa9a0f02b8bb36bf478538880"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77da4c6bfa20dd5ea25cbf12c76f181a8e8cd7ea231c673828d0386b1740b8dc"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2170c7e0367dde86a2647ed5b6f57394ea7f53545746104c6b09fc1f4223573"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c16842b846a8d2a145223f520b7e18b57c8f476924bda92aeee3a88d11cfc391"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:157e89ceb4054029a289fb504c98c6a9fe8010f1680de0201b3eb5dc20aa6d9e"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:203d236f4c94cd8379d1ea61db2fce20730b4c38d7f1c34506a31b34edc87bdd"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dc5d1a49d3f8262be192589a4b72f0d03b72dcf46c51ad5852a4fdc67be7b9e4"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:752bf8a74412b9892f4e5b58f2f890a039f57037f52c89a740757ebd807f33ea"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80080816b4f52a9d886e67f1f96912891074903238fe54f2de8b786f86baded2"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:84433dddea68571a6d6bd4fbf8ff398236031149116a7fff6f777ff95cad3df9"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ab19a2d91963ed9e42b4e8d77cd847ae8381576585bad79dbd0a8837a9f6620a"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:59556bf80a7094d0cfb9f5e50bb2db27fefb75d5138bb16fb052b61b0e0eeeb0"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:27d3ef2252d2e62476389ca8f9b0cf2bbafb082a3b6bfe9d90cbcbb5529ecf7c"}, + {file = "zstandard-0.23.0-cp310-cp310-win32.whl", hash = "sha256:5d41d5e025f1e0bccae4928981e71b2334c60f580bdc8345f824e7c0a4c2a813"}, + {file = "zstandard-0.23.0-cp310-cp310-win_amd64.whl", hash = "sha256:519fbf169dfac1222a76ba8861ef4ac7f0530c35dd79ba5727014613f91613d4"}, + {file = "zstandard-0.23.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:34895a41273ad33347b2fc70e1bff4240556de3c46c6ea430a7ed91f9042aa4e"}, + {file = "zstandard-0.23.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77ea385f7dd5b5676d7fd943292ffa18fbf5c72ba98f7d09fc1fb9e819b34c23"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:983b6efd649723474f29ed42e1467f90a35a74793437d0bc64a5bf482bedfa0a"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80a539906390591dd39ebb8d773771dc4db82ace6372c4d41e2d293f8e32b8db"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:445e4cb5048b04e90ce96a79b4b63140e3f4ab5f662321975679b5f6360b90e2"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd30d9c67d13d891f2360b2a120186729c111238ac63b43dbd37a5a40670b8ca"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d20fd853fbb5807c8e84c136c278827b6167ded66c72ec6f9a14b863d809211c"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed1708dbf4d2e3a1c5c69110ba2b4eb6678262028afd6c6fbcc5a8dac9cda68e"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:be9b5b8659dff1f913039c2feee1aca499cfbc19e98fa12bc85e037c17ec6ca5"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:65308f4b4890aa12d9b6ad9f2844b7ee42c7f7a4fd3390425b242ffc57498f48"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:98da17ce9cbf3bfe4617e836d561e433f871129e3a7ac16d6ef4c680f13a839c"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:8ed7d27cb56b3e058d3cf684d7200703bcae623e1dcc06ed1e18ecda39fee003"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:b69bb4f51daf461b15e7b3db033160937d3ff88303a7bc808c67bbc1eaf98c78"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:034b88913ecc1b097f528e42b539453fa82c3557e414b3de9d5632c80439a473"}, + {file = "zstandard-0.23.0-cp311-cp311-win32.whl", hash = "sha256:f2d4380bf5f62daabd7b751ea2339c1a21d1c9463f1feb7fc2bdcea2c29c3160"}, + {file = "zstandard-0.23.0-cp311-cp311-win_amd64.whl", hash = "sha256:62136da96a973bd2557f06ddd4e8e807f9e13cbb0bfb9cc06cfe6d98ea90dfe0"}, + {file = "zstandard-0.23.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4567955a6bc1b20e9c31612e615af6b53733491aeaa19a6b3b37f3b65477094"}, + {file = "zstandard-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e172f57cd78c20f13a3415cc8dfe24bf388614324d25539146594c16d78fcc8"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0e166f698c5a3e914947388c162be2583e0c638a4703fc6a543e23a88dea3c1"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12a289832e520c6bd4dcaad68e944b86da3bad0d339ef7989fb7e88f92e96072"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d50d31bfedd53a928fed6707b15a8dbeef011bb6366297cc435accc888b27c20"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72c68dda124a1a138340fb62fa21b9bf4848437d9ca60bd35db36f2d3345f373"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53dd9d5e3d29f95acd5de6802e909ada8d8d8cfa37a3ac64836f3bc4bc5512db"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6a41c120c3dbc0d81a8e8adc73312d668cd34acd7725f036992b1b72d22c1772"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:40b33d93c6eddf02d2c19f5773196068d875c41ca25730e8288e9b672897c105"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9206649ec587e6b02bd124fb7799b86cddec350f6f6c14bc82a2b70183e708ba"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76e79bc28a65f467e0409098fa2c4376931fd3207fbeb6b956c7c476d53746dd"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:66b689c107857eceabf2cf3d3fc699c3c0fe8ccd18df2219d978c0283e4c508a"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9c236e635582742fee16603042553d276cca506e824fa2e6489db04039521e90"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8fffdbd9d1408006baaf02f1068d7dd1f016c6bcb7538682622c556e7b68e35"}, + {file = "zstandard-0.23.0-cp312-cp312-win32.whl", hash = "sha256:dc1d33abb8a0d754ea4763bad944fd965d3d95b5baef6b121c0c9013eaf1907d"}, + {file = "zstandard-0.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:64585e1dba664dc67c7cdabd56c1e5685233fbb1fc1966cfba2a340ec0dfff7b"}, + {file = "zstandard-0.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:576856e8594e6649aee06ddbfc738fec6a834f7c85bf7cadd1c53d4a58186ef9"}, + {file = "zstandard-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38302b78a850ff82656beaddeb0bb989a0322a8bbb1bf1ab10c17506681d772a"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2240ddc86b74966c34554c49d00eaafa8200a18d3a5b6ffbf7da63b11d74ee2"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ef230a8fd217a2015bc91b74f6b3b7d6522ba48be29ad4ea0ca3a3775bf7dd5"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:774d45b1fac1461f48698a9d4b5fa19a69d47ece02fa469825b442263f04021f"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f77fa49079891a4aab203d0b1744acc85577ed16d767b52fc089d83faf8d8ed"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac184f87ff521f4840e6ea0b10c0ec90c6b1dcd0bad2f1e4a9a1b4fa177982ea"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c363b53e257246a954ebc7c488304b5592b9c53fbe74d03bc1c64dda153fb847"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e7792606d606c8df5277c32ccb58f29b9b8603bf83b48639b7aedf6df4fe8171"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a0817825b900fcd43ac5d05b8b3079937073d2b1ff9cf89427590718b70dd840"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9da6bc32faac9a293ddfdcb9108d4b20416219461e4ec64dfea8383cac186690"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fd7699e8fd9969f455ef2926221e0233f81a2542921471382e77a9e2f2b57f4b"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d477ed829077cd945b01fc3115edd132c47e6540ddcd96ca169facff28173057"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ce8b52c5987b3e34d5674b0ab529a4602b632ebab0a93b07bfb4dfc8f8a33"}, + {file = "zstandard-0.23.0-cp313-cp313-win32.whl", hash = "sha256:a9b07268d0c3ca5c170a385a0ab9fb7fdd9f5fd866be004c4ea39e44edce47dd"}, + {file = "zstandard-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:f3513916e8c645d0610815c257cbfd3242adfd5c4cfa78be514e5a3ebb42a41b"}, + {file = "zstandard-0.23.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2ef3775758346d9ac6214123887d25c7061c92afe1f2b354f9388e9e4d48acfc"}, + {file = "zstandard-0.23.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4051e406288b8cdbb993798b9a45c59a4896b6ecee2f875424ec10276a895740"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2d1a054f8f0a191004675755448d12be47fa9bebbcffa3cdf01db19f2d30a54"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f83fa6cae3fff8e98691248c9320356971b59678a17f20656a9e59cd32cee6d8"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32ba3b5ccde2d581b1e6aa952c836a6291e8435d788f656fe5976445865ae045"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f146f50723defec2975fb7e388ae3a024eb7151542d1599527ec2aa9cacb152"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1bfe8de1da6d104f15a60d4a8a768288f66aa953bbe00d027398b93fb9680b26"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:29a2bc7c1b09b0af938b7a8343174b987ae021705acabcbae560166567f5a8db"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:61f89436cbfede4bc4e91b4397eaa3e2108ebe96d05e93d6ccc95ab5714be512"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:53ea7cdc96c6eb56e76bb06894bcfb5dfa93b7adcf59d61c6b92674e24e2dd5e"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:a4ae99c57668ca1e78597d8b06d5af837f377f340f4cce993b551b2d7731778d"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:379b378ae694ba78cef921581ebd420c938936a153ded602c4fea612b7eaa90d"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:50a80baba0285386f97ea36239855f6020ce452456605f262b2d33ac35c7770b"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:61062387ad820c654b6a6b5f0b94484fa19515e0c5116faf29f41a6bc91ded6e"}, + {file = "zstandard-0.23.0-cp38-cp38-win32.whl", hash = "sha256:b8c0bd73aeac689beacd4e7667d48c299f61b959475cdbb91e7d3d88d27c56b9"}, + {file = "zstandard-0.23.0-cp38-cp38-win_amd64.whl", hash = "sha256:a05e6d6218461eb1b4771d973728f0133b2a4613a6779995df557f70794fd60f"}, + {file = "zstandard-0.23.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa014d55c3af933c1315eb4bb06dd0459661cc0b15cd61077afa6489bec63bb"}, + {file = "zstandard-0.23.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7f0804bb3799414af278e9ad51be25edf67f78f916e08afdb983e74161b916"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb2b1ecfef1e67897d336de3a0e3f52478182d6a47eda86cbd42504c5cbd009a"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:837bb6764be6919963ef41235fd56a6486b132ea64afe5fafb4cb279ac44f259"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1516c8c37d3a053b01c1c15b182f3b5f5eef19ced9b930b684a73bad121addf4"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48ef6a43b1846f6025dde6ed9fee0c24e1149c1c25f7fb0a0585572b2f3adc58"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11e3bf3c924853a2d5835b24f03eeba7fc9b07d8ca499e247e06ff5676461a15"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2fb4535137de7e244c230e24f9d1ec194f61721c86ebea04e1581d9d06ea1269"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8c24f21fa2af4bb9f2c492a86fe0c34e6d2c63812a839590edaf177b7398f700"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8c86881813a78a6f4508ef9daf9d4995b8ac2d147dcb1a450448941398091c9"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fe3b385d996ee0822fd46528d9f0443b880d4d05528fd26a9119a54ec3f91c69"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:82d17e94d735c99621bf8ebf9995f870a6b3e6d14543b99e201ae046dfe7de70"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c7c517d74bea1a6afd39aa612fa025e6b8011982a0897768a2f7c8ab4ebb78a2"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1fd7e0f1cfb70eb2f95a19b472ee7ad6d9a0a992ec0ae53286870c104ca939e5"}, + {file = "zstandard-0.23.0-cp39-cp39-win32.whl", hash = "sha256:43da0f0092281bf501f9c5f6f3b4c975a8a0ea82de49ba3f7100e64d422a1274"}, + {file = "zstandard-0.23.0-cp39-cp39-win_amd64.whl", hash = "sha256:f8346bfa098532bc1fb6c7ef06783e969d87a99dd1d2a5a18a892c1d7a643c58"}, + {file = "zstandard-0.23.0.tar.gz", hash = "sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09"}, ] [package.dependencies] diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 62b887bc47..2451bf7df7 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -124,6 +124,7 @@ Schema, SchemaVisitorPerPrimitiveType, SchemaWithPartnerVisitor, + _check_schema_compatible, pre_order_visit, promote, prune_columns, @@ -1413,7 +1414,7 @@ def list(self, list_type: ListType, list_array: Optional[pa.Array], value_array: # This can be removed once this has been fixed: # https://github.com/apache/arrow/issues/38809 list_array = pa.LargeListArray.from_arrays(list_array.offsets, value_array) - + value_array = self._cast_if_needed(list_type.element_field, value_array) arrow_field = pa.large_list(self._construct_field(list_type.element_field, value_array.type)) return list_array.cast(arrow_field) else: @@ -1423,6 +1424,8 @@ def map( self, map_type: MapType, map_array: Optional[pa.Array], key_result: Optional[pa.Array], value_result: Optional[pa.Array] ) -> Optional[pa.Array]: if isinstance(map_array, pa.MapArray) and key_result is not None and value_result is not None: + key_result = self._cast_if_needed(map_type.key_field, key_result) + value_result = self._cast_if_needed(map_type.value_field, value_result) arrow_field = pa.map_( self._construct_field(map_type.key_field, key_result.type), self._construct_field(map_type.value_field, value_result.type), @@ -1555,9 +1558,16 @@ def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc expected_physical_type = _primitive_to_physical(iceberg_type) if expected_physical_type != physical_type_string: - raise ValueError( - f"Unexpected physical type {physical_type_string} for {iceberg_type}, expected {expected_physical_type}" - ) + # Allow promotable physical types + # INT32 -> INT64 and FLOAT -> DOUBLE are safe type casts + if (physical_type_string == "INT32" and expected_physical_type == "INT64") or ( + physical_type_string == "FLOAT" and expected_physical_type == "DOUBLE" + ): + pass + else: + raise ValueError( + f"Unexpected physical type {physical_type_string} for {iceberg_type}, expected {expected_physical_type}" + ) self.primitive_type = iceberg_type @@ -1902,16 +1912,6 @@ def data_file_statistics_from_parquet_metadata( set the mode for column metrics collection parquet_column_mapping (Dict[str, int]): The mapping of the parquet file name to the field ID """ - if parquet_metadata.num_columns != len(stats_columns): - raise ValueError( - f"Number of columns in statistics configuration ({len(stats_columns)}) is different from the number of columns in pyarrow table ({parquet_metadata.num_columns})" - ) - - if parquet_metadata.num_columns != len(parquet_column_mapping): - raise ValueError( - f"Number of columns in column mapping ({len(parquet_column_mapping)}) is different from the number of columns in pyarrow table ({parquet_metadata.num_columns})" - ) - column_sizes: Dict[int, int] = {} value_counts: Dict[int, int] = {} split_offsets: List[int] = [] @@ -2004,8 +2004,7 @@ def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteT ) def write_parquet(task: WriteTask) -> DataFile: - table_schema = task.schema - + table_schema = table_metadata.schema() # if schema needs to be transformed, use the transformed schema and adjust the arrow table accordingly # otherwise use the original schema if (sanitized_schema := sanitize_column_names(table_schema)) != table_schema: @@ -2017,7 +2016,7 @@ def write_parquet(task: WriteTask) -> DataFile: batches = [ _to_requested_schema( requested_schema=file_schema, - file_schema=table_schema, + file_schema=task.schema, batch=batch, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us, include_field_ids=True, @@ -2076,47 +2075,30 @@ def bin_pack_arrow_table(tbl: pa.Table, target_file_size: int) -> Iterator[List[ return bin_packed_record_batches -def _check_schema_compatible(table_schema: Schema, other_schema: pa.Schema, downcast_ns_timestamp_to_us: bool = False) -> None: +def _check_pyarrow_schema_compatible( + requested_schema: Schema, provided_schema: pa.Schema, downcast_ns_timestamp_to_us: bool = False +) -> None: """ - Check if the `table_schema` is compatible with `other_schema`. + Check if the `requested_schema` is compatible with `provided_schema`. Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type. Raises: ValueError: If the schemas are not compatible. """ - name_mapping = table_schema.name_mapping + name_mapping = requested_schema.name_mapping try: - task_schema = pyarrow_to_schema( - other_schema, name_mapping=name_mapping, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us + provided_schema = pyarrow_to_schema( + provided_schema, name_mapping=name_mapping, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us ) except ValueError as e: - other_schema = _pyarrow_to_schema_without_ids(other_schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us) - additional_names = set(other_schema.column_names) - set(table_schema.column_names) + provided_schema = _pyarrow_to_schema_without_ids(provided_schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us) + additional_names = set(provided_schema._name_to_id.keys()) - set(requested_schema._name_to_id.keys()) raise ValueError( f"PyArrow table contains more columns: {', '.join(sorted(additional_names))}. Update the schema first (hint, use union_by_name)." ) from e - if table_schema.as_struct() != task_schema.as_struct(): - from rich.console import Console - from rich.table import Table as RichTable - - console = Console(record=True) - - rich_table = RichTable(show_header=True, header_style="bold") - rich_table.add_column("") - rich_table.add_column("Table field") - rich_table.add_column("Dataframe field") - - for lhs in table_schema.fields: - try: - rhs = task_schema.find_field(lhs.field_id) - rich_table.add_row("✅" if lhs == rhs else "❌", str(lhs), str(rhs)) - except ValueError: - rich_table.add_row("❌", str(lhs), "Missing") - - console.print(rich_table) - raise ValueError(f"Mismatch in fields:\n{console.export_text()}") + _check_schema_compatible(requested_schema, provided_schema) def parquet_files_to_data_files(io: FileIO, table_metadata: TableMetadata, file_paths: Iterator[str]) -> Iterator[DataFile]: @@ -2130,7 +2112,7 @@ def parquet_files_to_data_files(io: FileIO, table_metadata: TableMetadata, file_ f"Cannot add file {file_path} because it has field IDs. `add_files` only supports addition of files without field_ids" ) schema = table_metadata.schema() - _check_schema_compatible(schema, parquet_metadata.schema.to_arrow_schema()) + _check_pyarrow_schema_compatible(schema, parquet_metadata.schema.to_arrow_schema()) statistics = data_file_statistics_from_parquet_metadata( parquet_metadata=parquet_metadata, @@ -2211,7 +2193,7 @@ def _dataframe_to_data_files( Returns: An iterable that supplies datafiles that represent the table. """ - from pyiceberg.table import PropertyUtil, TableProperties, WriteTask + from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE, PropertyUtil, TableProperties, WriteTask counter = counter or itertools.count(0) write_uuid = write_uuid or uuid.uuid4() @@ -2220,13 +2202,16 @@ def _dataframe_to_data_files( property_name=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES, default=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, ) + name_mapping = table_metadata.schema().name_mapping + downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False + task_schema = pyarrow_to_schema(df.schema, name_mapping=name_mapping, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us) if table_metadata.spec().is_unpartitioned(): yield from write_file( io=io, table_metadata=table_metadata, tasks=iter([ - WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=table_metadata.schema()) + WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=task_schema) for batches in bin_pack_arrow_table(df, target_file_size) ]), ) @@ -2241,7 +2226,7 @@ def _dataframe_to_data_files( task_id=next(counter), record_batches=batches, partition_key=partition.partition_key, - schema=table_metadata.schema(), + schema=task_schema, ) for partition in partitions for batches in bin_pack_arrow_table(partition.arrow_table_partition, target_file_size) diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py index 77f1addbf5..cfe3fe3a7b 100644 --- a/pyiceberg/schema.py +++ b/pyiceberg/schema.py @@ -1616,3 +1616,103 @@ def _(file_type: FixedType, read_type: IcebergType) -> IcebergType: return read_type else: raise ResolveError(f"Cannot promote {file_type} to {read_type}") + + +def _check_schema_compatible(requested_schema: Schema, provided_schema: Schema) -> None: + """ + Check if the `provided_schema` is compatible with `requested_schema`. + + Both Schemas must have valid IDs and share the same ID for the same field names. + + Two schemas are considered compatible when: + 1. All `required` fields in `requested_schema` are present and are also `required` in the `provided_schema` + 2. Field Types are consistent for fields that are present in both schemas. I.e. the field type + in the `provided_schema` can be promoted to the field type of the same field ID in `requested_schema` + + Raises: + ValueError: If the schemas are not compatible. + """ + pre_order_visit(requested_schema, _SchemaCompatibilityVisitor(provided_schema)) + + +class _SchemaCompatibilityVisitor(PreOrderSchemaVisitor[bool]): + provided_schema: Schema + + def __init__(self, provided_schema: Schema): + from rich.console import Console + from rich.table import Table as RichTable + + self.provided_schema = provided_schema + self.rich_table = RichTable(show_header=True, header_style="bold") + self.rich_table.add_column("") + self.rich_table.add_column("Table field") + self.rich_table.add_column("Dataframe field") + self.console = Console(record=True) + + def _is_field_compatible(self, lhs: NestedField) -> bool: + # Validate nullability first. + # An optional field can be missing in the provided schema + # But a required field must exist as a required field + try: + rhs = self.provided_schema.find_field(lhs.field_id) + except ValueError: + if lhs.required: + self.rich_table.add_row("❌", str(lhs), "Missing") + return False + else: + self.rich_table.add_row("✅", str(lhs), "Missing") + return True + + if lhs.required and not rhs.required: + self.rich_table.add_row("❌", str(lhs), str(rhs)) + return False + + # Check type compatibility + if lhs.field_type == rhs.field_type: + self.rich_table.add_row("✅", str(lhs), str(rhs)) + return True + # We only check that the parent node is also of the same type. + # We check the type of the child nodes when we traverse them later. + elif any( + (isinstance(lhs.field_type, container_type) and isinstance(rhs.field_type, container_type)) + for container_type in {StructType, MapType, ListType} + ): + self.rich_table.add_row("✅", str(lhs), str(rhs)) + return True + else: + try: + # If type can be promoted to the requested schema + # it is considered compatible + promote(rhs.field_type, lhs.field_type) + self.rich_table.add_row("✅", str(lhs), str(rhs)) + return True + except ResolveError: + self.rich_table.add_row("❌", str(lhs), str(rhs)) + return False + + def schema(self, schema: Schema, struct_result: Callable[[], bool]) -> bool: + if not (result := struct_result()): + self.console.print(self.rich_table) + raise ValueError(f"Mismatch in fields:\n{self.console.export_text()}") + return result + + def struct(self, struct: StructType, field_results: List[Callable[[], bool]]) -> bool: + results = [result() for result in field_results] + return all(results) + + def field(self, field: NestedField, field_result: Callable[[], bool]) -> bool: + return self._is_field_compatible(field) and field_result() + + def list(self, list_type: ListType, element_result: Callable[[], bool]) -> bool: + return self._is_field_compatible(list_type.element_field) and element_result() + + def map(self, map_type: MapType, key_result: Callable[[], bool], value_result: Callable[[], bool]) -> bool: + return all([ + self._is_field_compatible(map_type.key_field), + self._is_field_compatible(map_type.value_field), + key_result(), + value_result(), + ]) + + def primitive(self, primitive: PrimitiveType) -> bool: + return True diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 4164280a24..0cbe4630e4 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -73,7 +73,6 @@ manifest_evaluator, ) from pyiceberg.io import FileIO, OutputFile, load_file_io -from pyiceberg.io.pyarrow import _check_schema_compatible, _dataframe_to_data_files, expression_to_pyarrow, project_table from pyiceberg.manifest import ( POSITIONAL_DELETE_SCHEMA, DataFile, @@ -478,6 +477,8 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) except ModuleNotFoundError as e: raise ModuleNotFoundError("For writes PyArrow needs to be installed") from e + from pyiceberg.io.pyarrow import _check_pyarrow_schema_compatible, _dataframe_to_data_files + if not isinstance(df, pa.Table): raise ValueError(f"Expected PyArrow table, got: {df}") @@ -488,8 +489,8 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: {unsupported_partitions}." ) downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False - _check_schema_compatible( - self._table.schema(), other_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us + _check_pyarrow_schema_compatible( + self._table.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us ) manifest_merge_enabled = PropertyUtil.property_as_bool( @@ -535,6 +536,8 @@ def overwrite( except ModuleNotFoundError as e: raise ModuleNotFoundError("For writes PyArrow needs to be installed") from e + from pyiceberg.io.pyarrow import _check_pyarrow_schema_compatible, _dataframe_to_data_files + if not isinstance(df, pa.Table): raise ValueError(f"Expected PyArrow table, got: {df}") @@ -545,8 +548,8 @@ def overwrite( f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: {unsupported_partitions}." ) downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False - _check_schema_compatible( - self._table.schema(), other_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us + _check_pyarrow_schema_compatible( + self._table.schema(), provided_schema=df.schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us ) self.delete(delete_filter=overwrite_filter, snapshot_properties=snapshot_properties) @@ -573,6 +576,8 @@ def delete(self, delete_filter: Union[str, BooleanExpression], snapshot_properti delete_filter: A boolean expression to delete rows from a table snapshot_properties: Custom properties to be added to the snapshot summary """ + from pyiceberg.io.pyarrow import _dataframe_to_data_files, expression_to_pyarrow, project_table + if ( self.table_metadata.properties.get(TableProperties.DELETE_MODE, TableProperties.DELETE_MODE_DEFAULT) == TableProperties.DELETE_MODE_MERGE_ON_READ diff --git a/pyiceberg/table/name_mapping.py b/pyiceberg/table/name_mapping.py index 5a4e769003..cb9f72bf97 100644 --- a/pyiceberg/table/name_mapping.py +++ b/pyiceberg/table/name_mapping.py @@ -37,7 +37,7 @@ class MappedField(IcebergBaseModel): field_id: int = Field(alias="field-id") - names: List[str] = conlist(str, min_length=1) + names: List[str] = conlist(str) fields: List[MappedField] = Field(default_factory=list) @field_validator("fields", mode="before") @@ -45,18 +45,6 @@ class MappedField(IcebergBaseModel): def convert_null_to_empty_List(cls, v: Any) -> Any: return v or [] - @field_validator("names", mode="after") - @classmethod - def check_at_least_one(cls, v: List[str]) -> Any: - """ - Conlist constraint does not seem to be validating the class on instantiation. - - Adding a custom validator to enforce min_length=1 constraint. - """ - if len(v) < 1: - raise ValueError("At least one mapped name must be provided for the field") - return v - @model_serializer def ser_model(self) -> Dict[str, Any]: """Set custom serializer to leave out the field when it is empty.""" diff --git a/tests/conftest.py b/tests/conftest.py index 91ab8f2e56..7f9a2bcfa8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2506,3 +2506,62 @@ def table_schema_with_all_microseconds_timestamp_precision() -> Schema: NestedField(field_id=10, name="timestamptz_ns_z", field_type=TimestamptzType(), required=False), NestedField(field_id=11, name="timestamptz_s_0000", field_type=TimestamptzType(), required=False), ) + + +@pytest.fixture(scope="session") +def table_schema_with_promoted_types() -> Schema: + """Iceberg table Schema with longs, doubles and uuid in simple and nested types.""" + return Schema( + NestedField(field_id=1, name="long", field_type=LongType(), required=False), + NestedField( + field_id=2, + name="list", + field_type=ListType(element_id=4, element_type=LongType(), element_required=False), + required=True, + ), + NestedField( + field_id=3, + name="map", + field_type=MapType( + key_id=5, + key_type=StringType(), + value_id=6, + value_type=LongType(), + value_required=False, + ), + required=True, + ), + NestedField(field_id=7, name="double", field_type=DoubleType(), required=False), + NestedField(field_id=8, name="uuid", field_type=UUIDType(), required=False), + ) + + +@pytest.fixture(scope="session") +def pyarrow_schema_with_promoted_types() -> "pa.Schema": + """Pyarrow Schema with longs, doubles and uuid in simple and nested types.""" + import pyarrow as pa + + return pa.schema(( + pa.field("long", pa.int32(), nullable=True), # can support upcasting integer to long + pa.field("list", pa.list_(pa.int32()), nullable=False), # can support upcasting integer to long + pa.field("map", pa.map_(pa.string(), pa.int32()), nullable=False), # can support upcasting integer to long + pa.field("double", pa.float32(), nullable=True), # can support upcasting float to double + pa.field("uuid", pa.binary(length=16), nullable=True), # can support upcasting float to double + )) + + +@pytest.fixture(scope="session") +def pyarrow_table_with_promoted_types(pyarrow_schema_with_promoted_types: "pa.Schema") -> "pa.Table": + """Pyarrow table with longs, doubles and uuid in simple and nested types.""" + import pyarrow as pa + + return pa.Table.from_pydict( + { + "long": [1, 9], + "list": [[1, 1], [2, 2]], + "map": [{"a": 1}, {"b": 2}], + "double": [1.1, 9.2], + "uuid": [b"qZx\xefNS@\x89\x9b\xf9:\xd0\xee\x9b\xf5E", b"\x97]\x87T^JDJ\x96\x97\xf4v\xe4\x03\x0c\xde"], + }, + schema=pyarrow_schema_with_promoted_types, + ) diff --git a/tests/integration/test_add_files.py b/tests/integration/test_add_files.py index b8fd6d0926..3703a9e0b6 100644 --- a/tests/integration/test_add_files.py +++ b/tests/integration/test_add_files.py @@ -30,6 +30,7 @@ from pyiceberg.catalog import Catalog from pyiceberg.exceptions import NoSuchTableError from pyiceberg.io import FileIO +from pyiceberg.io.pyarrow import _pyarrow_schema_ensure_large_types from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.table import Table @@ -38,6 +39,7 @@ BooleanType, DateType, IntegerType, + LongType, NestedField, StringType, TimestamptzType, @@ -505,7 +507,7 @@ def test_add_files_fails_on_schema_mismatch(spark: SparkSession, session_catalog ┃ ┃ Table field ┃ Dataframe field ┃ ┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ ✅ │ 1: foo: optional boolean │ 1: foo: optional boolean │ -| ✅ │ 2: bar: optional string │ 2: bar: optional string │ +│ ✅ │ 2: bar: optional string │ 2: bar: optional string │ │ ❌ │ 3: baz: optional int │ 3: baz: optional string │ │ ✅ │ 4: qux: optional date │ 4: qux: optional date │ └────┴──────────────────────────┴──────────────────────────┘ @@ -589,18 +591,7 @@ def test_add_files_with_timestamp_tz_ns_fails(session_catalog: Catalog, format_v mocker.patch.dict(os.environ, values={"PYICEBERG_DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE": "True"}) identifier = f"default.timestamptz_ns_added{format_version}" - - try: - session_catalog.drop_table(identifier=identifier) - except NoSuchTableError: - pass - - tbl = session_catalog.create_table( - identifier=identifier, - schema=nanoseconds_schema_iceberg, - properties={"format-version": str(format_version)}, - partition_spec=PartitionSpec(), - ) + tbl = _create_table(session_catalog, identifier, format_version, schema=nanoseconds_schema_iceberg) file_path = f"s3://warehouse/default/test_timestamp_tz/v{format_version}/test.parquet" # write parquet files @@ -617,3 +608,127 @@ def test_add_files_with_timestamp_tz_ns_fails(session_catalog: Catalog, format_v ), ): tbl.add_files(file_paths=[file_path]) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_add_file_with_valid_nullability_diff(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None: + identifier = f"default.test_table_with_valid_nullability_diff{format_version}" + table_schema = Schema( + NestedField(field_id=1, name="long", field_type=LongType(), required=False), + ) + other_schema = pa.schema(( + pa.field("long", pa.int64(), nullable=False), # can support writing required pyarrow field to optional Iceberg field + )) + arrow_table = pa.Table.from_pydict( + { + "long": [1, 9], + }, + schema=other_schema, + ) + tbl = _create_table(session_catalog, identifier, format_version, schema=table_schema) + + file_path = f"s3://warehouse/default/test_add_file_with_valid_nullability_diff/v{format_version}/test.parquet" + # write parquet files + fo = tbl.io.new_output(file_path) + with fo.create(overwrite=True) as fos: + with pq.ParquetWriter(fos, schema=other_schema) as writer: + writer.write_table(arrow_table) + + tbl.add_files(file_paths=[file_path]) + # table's long field should cast to be optional on read + written_arrow_table = tbl.scan().to_arrow() + assert written_arrow_table == arrow_table.cast(pa.schema((pa.field("long", pa.int64(), nullable=True),))) + lhs = spark.table(f"{identifier}").toPandas() + rhs = written_arrow_table.to_pandas() + + for column in written_arrow_table.column_names: + for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): + assert left == right + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_add_files_with_valid_upcast( + spark: SparkSession, + session_catalog: Catalog, + format_version: int, + table_schema_with_promoted_types: Schema, + pyarrow_schema_with_promoted_types: pa.Schema, + pyarrow_table_with_promoted_types: pa.Table, +) -> None: + identifier = f"default.test_table_with_valid_upcast{format_version}" + tbl = _create_table(session_catalog, identifier, format_version, schema=table_schema_with_promoted_types) + + file_path = f"s3://warehouse/default/test_add_files_with_valid_upcast/v{format_version}/test.parquet" + # write parquet files + fo = tbl.io.new_output(file_path) + with fo.create(overwrite=True) as fos: + with pq.ParquetWriter(fos, schema=pyarrow_schema_with_promoted_types) as writer: + writer.write_table(pyarrow_table_with_promoted_types) + + tbl.add_files(file_paths=[file_path]) + # table's long field should cast to long on read + written_arrow_table = tbl.scan().to_arrow() + assert written_arrow_table == pyarrow_table_with_promoted_types.cast( + pa.schema(( + pa.field("long", pa.int64(), nullable=True), + pa.field("list", pa.large_list(pa.int64()), nullable=False), + pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False), + pa.field("double", pa.float64(), nullable=True), + pa.field("uuid", pa.binary(length=16), nullable=True), # can UUID is read as fixed length binary of length 16 + )) + ) + lhs = spark.table(f"{identifier}").toPandas() + rhs = written_arrow_table.to_pandas() + + for column in written_arrow_table.column_names: + for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): + if column == "map": + # Arrow returns a list of tuples, instead of a dict + right = dict(right) + if column == "list": + # Arrow returns an array, convert to list for equality check + left, right = list(left), list(right) + if column == "uuid": + # Spark Iceberg represents UUID as hex string like '715a78ef-4e53-4089-9bf9-3ad0ee9bf545' + # whereas PyIceberg represents UUID as bytes on read + left, right = left.replace("-", ""), right.hex() + assert left == right + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_add_files_subset_of_schema(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None: + identifier = f"default.test_table_subset_of_schema{format_version}" + tbl = _create_table(session_catalog, identifier, format_version) + + file_path = f"s3://warehouse/default/test_add_files_subset_of_schema/v{format_version}/test.parquet" + arrow_table_without_some_columns = ARROW_TABLE.combine_chunks().drop(ARROW_TABLE.column_names[0]) + + # write parquet files + fo = tbl.io.new_output(file_path) + with fo.create(overwrite=True) as fos: + with pq.ParquetWriter(fos, schema=arrow_table_without_some_columns.schema) as writer: + writer.write_table(arrow_table_without_some_columns) + + tbl.add_files(file_paths=[file_path]) + written_arrow_table = tbl.scan().to_arrow() + assert tbl.scan().to_arrow() == pa.Table.from_pylist( + [ + { + "foo": None, # Missing column is read as None on read + "bar": "bar_string", + "baz": 123, + "qux": date(2024, 3, 7), + } + ], + schema=_pyarrow_schema_ensure_large_types(ARROW_SCHEMA), + ) + + lhs = spark.table(f"{identifier}").toPandas() + rhs = written_arrow_table.to_pandas() + + for column in written_arrow_table.column_names: + for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): + assert left == right diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 41bc6fb5bf..09fe654d29 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -43,7 +43,7 @@ from pyiceberg.schema import Schema from pyiceberg.table import TableProperties from pyiceberg.transforms import IdentityTransform -from pyiceberg.types import IntegerType, NestedField +from pyiceberg.types import IntegerType, LongType, NestedField from utils import _create_table @@ -964,9 +964,10 @@ def test_sanitize_character_partitioned(catalog: Catalog) -> None: assert len(tbl.scan().to_arrow()) == 22 +@pytest.mark.integration @pytest.mark.parametrize("format_version", [1, 2]) -def table_write_subset_of_schema(session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int) -> None: - identifier = "default.table_append_subset_of_schema" +def test_table_write_subset_of_schema(session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int) -> None: + identifier = "default.test_table_write_subset_of_schema" tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, [arrow_table_with_null]) arrow_table_without_some_columns = arrow_table_with_null.combine_chunks().drop(arrow_table_with_null.column_names[0]) assert len(arrow_table_without_some_columns.columns) < len(arrow_table_with_null.columns) @@ -976,6 +977,101 @@ def table_write_subset_of_schema(session_catalog: Catalog, arrow_table_with_null assert len(tbl.scan().to_arrow()) == len(arrow_table_without_some_columns) * 2 +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_table_write_out_of_order_schema(session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int) -> None: + identifier = "default.test_table_write_out_of_order_schema" + # rotate the schema fields by 1 + fields = list(arrow_table_with_null.schema) + rotated_fields = fields[1:] + fields[:1] + rotated_schema = pa.schema(rotated_fields) + assert arrow_table_with_null.schema != rotated_schema + tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=rotated_schema) + + tbl.overwrite(arrow_table_with_null) + tbl.append(arrow_table_with_null) + # overwrite and then append should produce twice the data + assert len(tbl.scan().to_arrow()) == len(arrow_table_with_null) * 2 + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_table_write_schema_with_valid_nullability_diff( + spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int +) -> None: + identifier = "default.test_table_write_with_valid_nullability_diff" + table_schema = Schema( + NestedField(field_id=1, name="long", field_type=LongType(), required=False), + ) + other_schema = pa.schema(( + pa.field("long", pa.int64(), nullable=False), # can support writing required pyarrow field to optional Iceberg field + )) + arrow_table = pa.Table.from_pydict( + { + "long": [1, 9], + }, + schema=other_schema, + ) + tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, [arrow_table], schema=table_schema) + # table's long field should cast to be optional on read + written_arrow_table = tbl.scan().to_arrow() + assert written_arrow_table == arrow_table.cast(pa.schema((pa.field("long", pa.int64(), nullable=True),))) + lhs = spark.table(f"{identifier}").toPandas() + rhs = written_arrow_table.to_pandas() + + for column in written_arrow_table.column_names: + for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): + assert left == right + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_table_write_schema_with_valid_upcast( + spark: SparkSession, + session_catalog: Catalog, + format_version: int, + table_schema_with_promoted_types: Schema, + pyarrow_schema_with_promoted_types: pa.Schema, + pyarrow_table_with_promoted_types: pa.Table, +) -> None: + identifier = "default.test_table_write_with_valid_upcast" + + tbl = _create_table( + session_catalog, + identifier, + {"format-version": format_version}, + [pyarrow_table_with_promoted_types], + schema=table_schema_with_promoted_types, + ) + # table's long field should cast to long on read + written_arrow_table = tbl.scan().to_arrow() + assert written_arrow_table == pyarrow_table_with_promoted_types.cast( + pa.schema(( + pa.field("long", pa.int64(), nullable=True), + pa.field("list", pa.large_list(pa.int64()), nullable=False), + pa.field("map", pa.map_(pa.large_string(), pa.int64()), nullable=False), + pa.field("double", pa.float64(), nullable=True), # can support upcasting float to double + pa.field("uuid", pa.binary(length=16), nullable=True), # can UUID is read as fixed length binary of length 16 + )) + ) + lhs = spark.table(f"{identifier}").toPandas() + rhs = written_arrow_table.to_pandas() + + for column in written_arrow_table.column_names: + for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): + if column == "map": + # Arrow returns a list of tuples, instead of a dict + right = dict(right) + if column == "list": + # Arrow returns an array, convert to list for equality check + left, right = list(left), list(right) + if column == "uuid": + # Spark Iceberg represents UUID as hex string like '715a78ef-4e53-4089-9bf9-3ad0ee9bf545' + # whereas PyIceberg represents UUID as bytes on read + left, right = left.replace("-", ""), right.hex() + assert left == right + + @pytest.mark.integration @pytest.mark.parametrize("format_version", [1, 2]) def test_write_all_timestamp_precision( diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index 69bfcbab2b..a8f4ac980f 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -61,7 +61,7 @@ PyArrowFile, PyArrowFileIO, StatsAggregator, - _check_schema_compatible, + _check_pyarrow_schema_compatible, _ConvertToArrowSchema, _determine_partitions, _primitive_to_physical, @@ -1795,7 +1795,7 @@ def test_schema_mismatch_type(table_schema_simple: Schema) -> None: """ with pytest.raises(ValueError, match=expected): - _check_schema_compatible(table_schema_simple, other_schema) + _check_pyarrow_schema_compatible(table_schema_simple, other_schema) def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None: @@ -1816,7 +1816,20 @@ def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None: """ with pytest.raises(ValueError, match=expected): - _check_schema_compatible(table_schema_simple, other_schema) + _check_pyarrow_schema_compatible(table_schema_simple, other_schema) + + +def test_schema_compatible_nullability_diff(table_schema_simple: Schema) -> None: + other_schema = pa.schema(( + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=False), + pa.field("baz", pa.bool_(), nullable=False), + )) + + try: + _check_pyarrow_schema_compatible(table_schema_simple, other_schema) + except Exception: + pytest.fail("Unexpected Exception raised when calling `_check_pyarrow_schema_compatible`") def test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None: @@ -1836,21 +1849,114 @@ def test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None: """ with pytest.raises(ValueError, match=expected): - _check_schema_compatible(table_schema_simple, other_schema) + _check_pyarrow_schema_compatible(table_schema_simple, other_schema) + + +def test_schema_compatible_missing_nullable_field_nested(table_schema_nested: Schema) -> None: + schema = table_schema_nested.as_arrow() + schema = schema.remove(6).insert( + 6, + pa.field( + "person", + pa.struct([ + pa.field("age", pa.int32(), nullable=False), + ]), + nullable=True, + ), + ) + try: + _check_pyarrow_schema_compatible(table_schema_nested, schema) + except Exception: + pytest.fail("Unexpected Exception raised when calling `_check_pyarrow_schema_compatible`") + + +def test_schema_mismatch_missing_required_field_nested(table_schema_nested: Schema) -> None: + other_schema = table_schema_nested.as_arrow() + other_schema = other_schema.remove(6).insert( + 6, + pa.field( + "person", + pa.struct([ + pa.field("name", pa.string(), nullable=True), + ]), + nullable=True, + ), + ) + expected = """Mismatch in fields: +┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ ┃ Table field ┃ Dataframe field ┃ +┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ ✅ │ 1: foo: optional string │ 1: foo: optional string │ +│ ✅ │ 2: bar: required int │ 2: bar: required int │ +│ ✅ │ 3: baz: optional boolean │ 3: baz: optional boolean │ +│ ✅ │ 4: qux: required list │ 4: qux: required list │ +│ ✅ │ 5: element: required string │ 5: element: required string │ +│ ✅ │ 6: quux: required map> │ map> │ +│ ✅ │ 7: key: required string │ 7: key: required string │ +│ ✅ │ 8: value: required map │ int> │ +│ ✅ │ 9: key: required string │ 9: key: required string │ +│ ✅ │ 10: value: required int │ 10: value: required int │ +│ ✅ │ 11: location: required │ 11: location: required │ +│ │ list> │ float>> │ +│ ✅ │ 12: element: required struct<13: │ 12: element: required struct<13: │ +│ │ latitude: optional float, 14: │ latitude: optional float, 14: │ +│ │ longitude: optional float> │ longitude: optional float> │ +│ ✅ │ 13: latitude: optional float │ 13: latitude: optional float │ +│ ✅ │ 14: longitude: optional float │ 14: longitude: optional float │ +│ ✅ │ 15: person: optional struct<16: │ 15: person: optional struct<16: │ +│ │ name: optional string, 17: age: │ name: optional string> │ +│ │ required int> │ │ +│ ✅ │ 16: name: optional string │ 16: name: optional string │ +│ ❌ │ 17: age: required int │ Missing │ +└────┴────────────────────────────────────┴────────────────────────────────────┘ +""" + + with pytest.raises(ValueError, match=expected): + _check_pyarrow_schema_compatible(table_schema_nested, other_schema) + + +def test_schema_compatible_nested(table_schema_nested: Schema) -> None: + try: + _check_pyarrow_schema_compatible(table_schema_nested, table_schema_nested.as_arrow()) + except Exception: + pytest.fail("Unexpected Exception raised when calling `_check_pyarrow_schema_compatible`") def test_schema_mismatch_additional_field(table_schema_simple: Schema) -> None: other_schema = pa.schema(( pa.field("foo", pa.string(), nullable=True), - pa.field("bar", pa.int32(), nullable=True), + pa.field("bar", pa.int32(), nullable=False), pa.field("baz", pa.bool_(), nullable=True), pa.field("new_field", pa.date32(), nullable=True), )) - expected = r"PyArrow table contains more columns: new_field. Update the schema first \(hint, use union_by_name\)." + with pytest.raises( + ValueError, match=r"PyArrow table contains more columns: new_field. Update the schema first \(hint, use union_by_name\)." + ): + _check_pyarrow_schema_compatible(table_schema_simple, other_schema) - with pytest.raises(ValueError, match=expected): - _check_schema_compatible(table_schema_simple, other_schema) + +def test_schema_compatible(table_schema_simple: Schema) -> None: + try: + _check_pyarrow_schema_compatible(table_schema_simple, table_schema_simple.as_arrow()) + except Exception: + pytest.fail("Unexpected Exception raised when calling `_check_pyarrow_schema_compatible`") + + +def test_schema_projection(table_schema_simple: Schema) -> None: + # remove optional `baz` field from `table_schema_simple` + other_schema = pa.schema(( + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=False), + )) + try: + _check_pyarrow_schema_compatible(table_schema_simple, other_schema) + except Exception: + pytest.fail("Unexpected Exception raised when calling `_check_pyarrow_schema_compatible`") def test_schema_downcast(table_schema_simple: Schema) -> None: @@ -1862,9 +1968,9 @@ def test_schema_downcast(table_schema_simple: Schema) -> None: )) try: - _check_schema_compatible(table_schema_simple, other_schema) + _check_pyarrow_schema_compatible(table_schema_simple, other_schema) except Exception: - pytest.fail("Unexpected Exception raised when calling `_check_schema`") + pytest.fail("Unexpected Exception raised when calling `_check_pyarrow_schema_compatible`") def test_partition_for_demo() -> None: diff --git a/tests/table/test_name_mapping.py b/tests/table/test_name_mapping.py index d4a2bf6c41..3c50a24e5e 100644 --- a/tests/table/test_name_mapping.py +++ b/tests/table/test_name_mapping.py @@ -91,6 +91,23 @@ def test_json_mapped_field_deserialization() -> None: assert MappedField(field_id=1, names=["id", "record_id"]) == MappedField.model_validate_json(mapped_field_with_null_fields) +def test_json_mapped_field_no_names_deserialization() -> None: + mapped_field = """{ + "field-id": 1, + "names": [] + } + """ + assert MappedField(field_id=1, names=[]) == MappedField.model_validate_json(mapped_field) + + mapped_field_with_null_fields = """{ + "field-id": 1, + "names": [], + "fields": null + } + """ + assert MappedField(field_id=1, names=[]) == MappedField.model_validate_json(mapped_field_with_null_fields) + + def test_json_name_mapping_deserialization() -> None: name_mapping = """ [ @@ -247,11 +264,6 @@ def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None: table_name_mapping_nested.find("boom") -def test_invalid_mapped_field() -> None: - with pytest.raises(ValueError): - MappedField(field_id=1, names=[]) - - def test_update_mapping_no_updates_or_adds(table_name_mapping_nested: NameMapping) -> None: assert update_mapping(table_name_mapping_nested, {}, {}) == table_name_mapping_nested