Skip to content

Allow union of {int,long}, {float,double}, etc #1283

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pyiceberg/table/update/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,13 @@ def _update_column(self, field: NestedField, existing_field: NestedField) -> Non
self.update_schema.make_column_optional(full_name)

if field.field_type.is_primitive and field.field_type != existing_field.field_type:
self.update_schema.update_column(full_name, field_type=field.field_type)
try:
# If the current type is wider than the new type, then
# we perform a noop
_ = promote(field.field_type, existing_field.field_type)
except ResolveError:
# If this is not the case, perform the type evolution
self.update_schema.update_column(full_name, field_type=field.field_type)

if field.doc is not None and field.doc != existing_field.doc:
self.update_schema.update_column(full_name, doc=field.doc)
Expand Down
39 changes: 36 additions & 3 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,17 @@ def test_detect_invalid_top_level_maps() -> None:
_ = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore


def test_allow_double_to_float() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=DoubleType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=FloatType(), required=False))

applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore

assert applied.as_struct() == current_schema.as_struct()
assert len(applied.fields) == 1
assert isinstance(applied.fields[0].field_type, DoubleType)


def test_promote_float_to_double() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=FloatType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=DoubleType(), required=False))
Expand All @@ -1200,11 +1211,33 @@ def test_promote_float_to_double() -> None:
assert isinstance(applied.fields[0].field_type, DoubleType)


def test_detect_invalid_promotion_double_to_float() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=DoubleType(), required=False))
def test_allow_long_to_int() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=LongType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=IntegerType(), required=False))

applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore

assert applied.as_struct() == current_schema.as_struct()
assert len(applied.fields) == 1
assert isinstance(applied.fields[0].field_type, LongType)


def test_promote_int_to_long() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=IntegerType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=LongType(), required=False))

applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore

assert applied.as_struct() == new_schema.as_struct()
assert len(applied.fields) == 1
assert isinstance(applied.fields[0].field_type, LongType)


def test_detect_invalid_promotion_string_to_float() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=StringType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=FloatType(), required=False))

with pytest.raises(ValidationError, match="Cannot change column type: aCol: double -> float"):
with pytest.raises(ValidationError, match="Cannot change column type: aCol: string -> float"):
_ = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore


Expand Down