-
Notifications
You must be signed in to change notification settings - Fork 281
feat: validate snapshot write compatibility #1772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
kaushiksrini
wants to merge
9
commits into
apache:main
Choose a base branch
from
kaushiksrini:check-write-compatibility-append
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+119
−1
Open
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
949e140
feat: validate snapshot write compatibility
kaushiksrini e631ddf
reuse ancestors_of existing functionality
kaushiksrini 740db96
Update pyiceberg/table/update/snapshot.py
kaushiksrini 611b017
fix mypy errors
kaushiksrini 0923dc4
add tests for verifying snapshot compatibility
kaushiksrini 57e0f90
update parent snapshot when there are conflicts and change exception
kaushiksrini 5122039
Merge branch 'main' into check-write-compatibility-append
Fokko 66849dd
add table content verification for tests
kaushiksrini 0824c35
modify allowed operations for replace
kaushiksrini File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,3 +50,5 @@ htmlcov | |
pyiceberg/avro/decoder_fast.c | ||
pyiceberg/avro/*.html | ||
pyiceberg/avro/*.so | ||
|
||
.ks/ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,7 +31,7 @@ | |
from pytest_mock.plugin import MockerFixture | ||
|
||
from pyiceberg.catalog import Catalog | ||
from pyiceberg.exceptions import NoSuchTableError | ||
from pyiceberg.exceptions import CommitFailedException, NoSuchTableError | ||
from pyiceberg.io import FileIO | ||
from pyiceberg.io.pyarrow import UnsupportedPyArrowTypeException, schema_to_pyarrow | ||
from pyiceberg.manifest import DataFile | ||
|
@@ -901,6 +901,81 @@ def test_add_files_that_referenced_by_current_snapshot_with_check_duplicate_file | |
assert f"Cannot add files that are already referenced by table, files: {existing_files_in_table}" in str(exc_info.value) | ||
|
||
|
||
@pytest.mark.integration | ||
@pytest.mark.parametrize("format_version", [1, 2]) | ||
def test_conflict_delete_delete( | ||
spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int | ||
) -> None: | ||
identifier = "default.test_conflict" | ||
tbl1 = _create_table(session_catalog, identifier, format_version, schema=arrow_table_with_null.schema) | ||
tbl1.append(arrow_table_with_null) | ||
tbl2 = session_catalog.load_table(identifier) | ||
|
||
tbl1.delete("string == 'z'") | ||
|
||
with pytest.raises( | ||
CommitFailedException, match="Operation .* is not allowed when performing .*. Check for overlaps or conflicts." | ||
): | ||
# tbl2 isn't aware of the commit by tbl1 | ||
tbl2.delete("string == 'z'") | ||
|
||
|
||
@pytest.mark.integration | ||
@pytest.mark.parametrize("format_version", [1, 2]) | ||
def test_conflict_delete_append( | ||
spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int | ||
) -> None: | ||
identifier = "default.test_conflict" | ||
tbl1 = _create_table(session_catalog, identifier, format_version, schema=arrow_table_with_null.schema) | ||
tbl1.append(arrow_table_with_null) | ||
tbl2 = session_catalog.load_table(identifier) | ||
|
||
# This is allowed | ||
tbl1.delete("string == 'z'") | ||
tbl2.append(arrow_table_with_null) | ||
|
||
# verify against expected table | ||
arrow_table_expected = arrow_table_with_null[:2] | ||
assert tbl1.scan().to_arrow() == arrow_table_expected | ||
|
||
|
||
@pytest.mark.integration | ||
@pytest.mark.parametrize("format_version", [1, 2]) | ||
def test_conflict_append_delete( | ||
spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int | ||
) -> None: | ||
identifier = "default.test_conflict" | ||
tbl1 = _create_table(session_catalog, identifier, format_version, schema=arrow_table_with_null.schema) | ||
tbl1.append(arrow_table_with_null) | ||
tbl2 = session_catalog.load_table(identifier) | ||
|
||
tbl1.append(arrow_table_with_null) | ||
|
||
with pytest.raises( | ||
CommitFailedException, match="Operation .* is not allowed when performing .*. Check for overlaps or conflicts." | ||
): | ||
# tbl2 isn't aware of the commit by tbl1 | ||
tbl2.delete("string == 'z'") | ||
|
||
|
||
@pytest.mark.integration | ||
@pytest.mark.parametrize("format_version", [2]) | ||
def test_conflict_append_append( | ||
spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int | ||
) -> None: | ||
identifier = "default.test_conflict" | ||
tbl1 = _create_table(session_catalog, identifier, format_version, schema=arrow_table_with_null.schema) | ||
tbl1.append(arrow_table_with_null) | ||
tbl2 = session_catalog.load_table(identifier) | ||
|
||
tbl1.append(arrow_table_with_null) | ||
tbl2.append(arrow_table_with_null) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we introduce an assertion here to verify the content of the table is as we'd expect? (with 3*arrow_table_with_null data) |
||
|
||
# verify against expected table | ||
arrow_table_expected = pa.concat_tables([arrow_table_with_null, arrow_table_with_null, arrow_table_with_null]) | ||
assert tbl1.scan().to_arrow() == arrow_table_expected | ||
|
||
|
||
@pytest.mark.integration | ||
def test_add_files_hour_transform(session_catalog: Catalog) -> None: | ||
identifier = "default.test_add_files_hour_transform" | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should verify the content of the table here