Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

table_exists unit/integration test for NoSuchTableError #678

Merged
merged 5 commits into from
May 4, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyiceberg/catalog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,11 @@ def table_exists(self, identifier: Union[str, Identifier]) -> bool:
Returns:
bool: True if the table exists, False otherwise.
"""
try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we get rid of the other table_exists function in the same file?

def table_exists(self, identifier: Union[str, Identifier]) -> bool:
try:
self.load_table(identifier)
return True
except NoSuchTableError:
return False

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do you think about changing the REST catalog's table_exists implementation to be similar to this?
https://github.com/apache/iceberg-python/pull/512/files#diff-3bda7391ebd8aa3dcfd6703d8d2764830b9d9c35fa854188a37d69611274bd3dR722-R727

maybe calls super().table_exists()?

Copy link
Contributor

@HonahX HonahX May 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is better for RestCatalog to maintain a separate implementation to make a head request to /v1/{prefix}/namespaces/{namespace}/tables/{table}, ref: #512 (comment), #507 (comment)

The try-catch implementation is for other non-rest catalogs and thus it is put in the MetastoreCatalog instead of the Catalog interface.

A little bit more context regarding the MetastoreCatalog: while working on #498, we found that many helper functions as well as some implementations are for non-rest catalogs only. So we decide to move those to another layer, MetastoreCatalog to make the inheritance look better: #498 (comment).

Do these sound reasonable to you?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense, thanks for the detailed explanation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pushed the changes, as per the discussion, thank you, guys, for the feedback!

self.load_table(identifier)
return True
except NoSuchTableError:
return False

@abstractmethod
def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table:
Expand Down
5 changes: 5 additions & 0 deletions tests/catalog/integration_test_dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,8 @@ def test_update_namespace_properties(test_catalog: Catalog, database_name: str)
else:
assert k in update_report.removed
assert "updated test description" == test_catalog.load_namespace_properties(database_name)["comment"]

def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None:
test_catalog.create_namespace(database_name)
test_catalog.create_table((database_name, table_name), table_schema_nested)
assert test_catalog.table_exists((database_name, table_name)) is True
MehulBatra marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 6 additions & 0 deletions tests/catalog/integration_test_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,3 +558,9 @@ def test_create_table_transaction(
]
},
]


def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:
test_catalog.create_namespace(database_name)
test_catalog.create_table((database_name, table_name), table_schema_nested)
assert test_catalog.table_exists((database_name, table_name)) is True
14 changes: 14 additions & 0 deletions tests/catalog/test_dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,3 +562,17 @@ def test_passing_provided_profile() -> None:
assert test_catalog.dynamodb is mock_client
mock_session.assert_called_with(**session_props)
assert test_catalog.dynamodb is mock_session().client()


@mock_aws
def test_table_exists(
_bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str
) -> None:
identifier = (database_name, table_name)
test_catalog = DynamoDbCatalog("test_ddb_catalog", **{"warehouse": f"s3://{BUCKET_NAME}", "s3.endpoint": moto_endpoint_url})
test_catalog.create_namespace(namespace=database_name)
test_catalog.create_table(identifier, table_schema_nested)
# Act and Assert for an existing table
assert test_catalog.table_exists(identifier) is True
# Act and Assert for an non-existing table
assert test_catalog.table_exists(('non', 'exist')) is False
15 changes: 15 additions & 0 deletions tests/catalog/test_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,3 +817,18 @@ def test_create_table_transaction(
assert table.spec().fields_by_source_id(2)[0].name == "bar"
assert table.spec().fields_by_source_id(2)[0].field_id == 1001
assert table.spec().fields_by_source_id(2)[0].transform == IdentityTransform()


@mock_aws
def test_table_exists(
_bucket_initialize: None, moto_endpoint_url: str, table_schema_simple: Schema, database_name: str, table_name: str
) -> None:
catalog_name = "glue"
identifier = (database_name, table_name)
test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"})
test_catalog.create_namespace(namespace=database_name)
test_catalog.create_table(identifier=identifier, schema=table_schema_simple)
# Act and Assert for an existing table
assert test_catalog.table_exists(identifier) is True
# Act and Assert for a non-existing table
assert test_catalog.table_exists(('non', 'exist')) is False
19 changes: 19 additions & 0 deletions tests/catalog/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,3 +982,22 @@ def test_table_properties_raise_for_none_value(
with pytest.raises(ValidationError) as exc_info:
_ = catalog.create_table(random_identifier, table_schema_simple, properties=property_with_none)
assert "None type is not a supported value in properties: property_name" in str(exc_info.value)


@pytest.mark.parametrize(
'catalog',
[
lazy_fixture('catalog_memory'),
lazy_fixture('catalog_sqlite'),
],
)
def test_table_exists(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None:
database_name, _table_name = random_identifier
catalog.create_namespace(database_name)
catalog.create_table(random_identifier, table_schema_simple, properties={"format-version": "2"})
existing_table = random_identifier
# Act and Assert for an existing table
assert catalog.table_exists(existing_table) is True

# Act and Assert for a non-existing table
assert catalog.table_exists(('non', 'exist')) is False