|
19 | 19 | from copy import copy
|
20 | 20 | from typing import Dict
|
21 | 21 |
|
| 22 | +import pyarrow as pa |
22 | 23 | import pytest
|
23 | 24 | from sortedcontainers import SortedList
|
24 | 25 |
|
|
58 | 59 | Table,
|
59 | 60 | UpdateSchema,
|
60 | 61 | _apply_table_update,
|
| 62 | + _check_schema, |
61 | 63 | _generate_snapshot_id,
|
62 | 64 | _match_deletes_to_data_file,
|
63 | 65 | _TableMetadataUpdateContext,
|
@@ -982,3 +984,79 @@ def test_correct_schema() -> None:
|
982 | 984 | _ = t.scan(snapshot_id=-1).projection()
|
983 | 985 |
|
984 | 986 | assert "Snapshot not found: -1" in str(exc_info.value)
|
| 987 | + |
| 988 | + |
| 989 | +def test_schema_mismatch_type(table_schema_simple: Schema) -> None: |
| 990 | + other_schema = pa.schema(( |
| 991 | + pa.field("foo", pa.string(), nullable=True), |
| 992 | + pa.field("bar", pa.decimal128(18, 6), nullable=False), |
| 993 | + pa.field("baz", pa.bool_(), nullable=True), |
| 994 | + )) |
| 995 | + |
| 996 | + expected = r"""Mismatch in fields: |
| 997 | +┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ |
| 998 | +┃ ┃ Table field ┃ Dataframe field ┃ |
| 999 | +┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ |
| 1000 | +│ ✅ │ 1: foo: optional string │ 1: foo: optional string │ |
| 1001 | +│ ❌ │ 2: bar: required int │ 2: bar: required decimal\(18, 6\) │ |
| 1002 | +│ ✅ │ 3: baz: optional boolean │ 3: baz: optional boolean │ |
| 1003 | +└────┴──────────────────────────┴─────────────────────────────────┘ |
| 1004 | +""" |
| 1005 | + |
| 1006 | + with pytest.raises(ValueError, match=expected): |
| 1007 | + _check_schema(table_schema_simple, other_schema) |
| 1008 | + |
| 1009 | + |
| 1010 | +def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None: |
| 1011 | + other_schema = pa.schema(( |
| 1012 | + pa.field("foo", pa.string(), nullable=True), |
| 1013 | + pa.field("bar", pa.int32(), nullable=True), |
| 1014 | + pa.field("baz", pa.bool_(), nullable=True), |
| 1015 | + )) |
| 1016 | + |
| 1017 | + expected = """Mismatch in fields: |
| 1018 | +┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ |
| 1019 | +┃ ┃ Table field ┃ Dataframe field ┃ |
| 1020 | +┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ |
| 1021 | +│ ✅ │ 1: foo: optional string │ 1: foo: optional string │ |
| 1022 | +│ ❌ │ 2: bar: required int │ 2: bar: optional int │ |
| 1023 | +│ ✅ │ 3: baz: optional boolean │ 3: baz: optional boolean │ |
| 1024 | +└────┴──────────────────────────┴──────────────────────────┘ |
| 1025 | +""" |
| 1026 | + |
| 1027 | + with pytest.raises(ValueError, match=expected): |
| 1028 | + _check_schema(table_schema_simple, other_schema) |
| 1029 | + |
| 1030 | + |
| 1031 | +def test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None: |
| 1032 | + other_schema = pa.schema(( |
| 1033 | + pa.field("foo", pa.string(), nullable=True), |
| 1034 | + pa.field("baz", pa.bool_(), nullable=True), |
| 1035 | + )) |
| 1036 | + |
| 1037 | + expected = """Mismatch in fields: |
| 1038 | +┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ |
| 1039 | +┃ ┃ Table field ┃ Dataframe field ┃ |
| 1040 | +┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ |
| 1041 | +│ ✅ │ 1: foo: optional string │ 1: foo: optional string │ |
| 1042 | +│ ❌ │ 2: bar: required int │ Missing │ |
| 1043 | +│ ✅ │ 3: baz: optional boolean │ 3: baz: optional boolean │ |
| 1044 | +└────┴──────────────────────────┴──────────────────────────┘ |
| 1045 | +""" |
| 1046 | + |
| 1047 | + with pytest.raises(ValueError, match=expected): |
| 1048 | + _check_schema(table_schema_simple, other_schema) |
| 1049 | + |
| 1050 | + |
| 1051 | +def test_schema_mismatch_additional_field(table_schema_simple: Schema) -> None: |
| 1052 | + other_schema = pa.schema(( |
| 1053 | + pa.field("foo", pa.string(), nullable=True), |
| 1054 | + pa.field("bar", pa.int32(), nullable=True), |
| 1055 | + pa.field("baz", pa.bool_(), nullable=True), |
| 1056 | + pa.field("new_field", pa.date32(), nullable=True), |
| 1057 | + )) |
| 1058 | + |
| 1059 | + expected = r"PyArrow table contains more columns: new_field. Update the schema first \(hint, use union_by_name\)." |
| 1060 | + |
| 1061 | + with pytest.raises(ValueError, match=expected): |
| 1062 | + _check_schema(table_schema_simple, other_schema) |
0 commit comments