Skip to content

Commit effb8cb

Browse files
authored
Hive: update hive storage descriptor after commit schema change (#2036)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change Like iceberg jar, we should also update hive storage descriptor after commit metadata see: https://github.com/apache/iceberg/blob/b504f9c51c6c0e0a5c0c5ff53f295e69b67d8e59/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java#L170 # Are these changes tested? new UTs # Are there any user-facing changes? No <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent 33c8931 commit effb8cb

File tree

3 files changed

+35
-0
lines changed

3 files changed

+35
-0
lines changed

dev/hive/core-site.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,9 @@
5050
<name>fs.s3a.path.style.access</name>
5151
<value>true</value>
5252
</property>
53+
<property>
54+
<name>hive.metastore.disallow.incompatible.col.type.changes</name>
55+
<value>false</value>
56+
</property>
57+
5358
</configuration>

pyiceberg/catalog/hive.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,12 @@ def commit_table(
561561
previous_metadata_location=current_table.metadata_location,
562562
metadata_properties=updated_staged_table.properties,
563563
)
564+
# Update hive's schema and properties
565+
hive_table.sd = _construct_hive_storage_descriptor(
566+
updated_staged_table.schema(),
567+
updated_staged_table.location(),
568+
property_as_bool(updated_staged_table.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
569+
)
564570
open_client.alter_table_with_environment_context(
565571
dbname=database_name,
566572
tbl_name=table_name,

tests/integration/test_writes/test_writes.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,30 @@ def test_hive_catalog_storage_descriptor(
11481148
assert spark.sql("SELECT * FROM hive.default.test_storage_descriptor").count() == 3
11491149

11501150

1151+
@pytest.mark.integration
1152+
@pytest.mark.parametrize("format_version", [1, 2])
1153+
def test_hive_catalog_storage_descriptor_has_changed(
1154+
session_catalog_hive: HiveCatalog,
1155+
pa_schema: pa.Schema,
1156+
arrow_table_with_null: pa.Table,
1157+
spark: SparkSession,
1158+
format_version: int,
1159+
) -> None:
1160+
tbl = _create_table(
1161+
session_catalog_hive, "default.test_storage_descriptor", {"format-version": format_version}, [arrow_table_with_null]
1162+
)
1163+
1164+
with tbl.transaction() as tx:
1165+
with tx.update_schema() as schema:
1166+
schema.update_column("string_long", doc="this is string_long")
1167+
schema.update_column("binary", doc="this is binary")
1168+
1169+
with session_catalog_hive._client as open_client:
1170+
hive_table = session_catalog_hive._get_hive_table(open_client, "default", "test_storage_descriptor")
1171+
assert "this is string_long" in str(hive_table.sd)
1172+
assert "this is binary" in str(hive_table.sd)
1173+
1174+
11511175
@pytest.mark.integration
11521176
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
11531177
def test_sanitize_character_partitioned(catalog: Catalog) -> None:

0 commit comments

Comments
 (0)