Skip to content

Commit a57cb07

Browse files
authored
Support catalog in identifier to locate existing tables (#150)
1 parent c55fe5e commit a57cb07

File tree

13 files changed

+730
-375
lines changed

13 files changed

+730
-375
lines changed

pyiceberg/catalog/__init__.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,20 @@ def identifier_to_database_and_table(
536536

537537
return tuple_identifier[0], tuple_identifier[1]
538538

539+
def identifier_to_tuple_without_catalog(self, identifier: Union[str, Identifier]) -> Identifier:
540+
"""Convert an identifier to a tuple and drop this catalog's name from the first element.
541+
542+
Args:
543+
identifier (str | Identifier): Table identifier.
544+
545+
Returns:
546+
Identifier: a tuple of strings with this catalog's name removed
547+
"""
548+
identifier_tuple = Catalog.identifier_to_tuple(identifier)
549+
if len(identifier_tuple) >= 3 and identifier_tuple[0] == self.name:
550+
identifier_tuple = identifier_tuple[1:]
551+
return identifier_tuple
552+
539553
def purge_table(self, identifier: Union[str, Identifier]) -> None:
540554
"""Drop a table and purge all data and metadata files.
541555
@@ -547,8 +561,9 @@ def purge_table(self, identifier: Union[str, Identifier]) -> None:
547561
Raises:
548562
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
549563
"""
550-
table = self.load_table(identifier)
551-
self.drop_table(identifier)
564+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
565+
table = self.load_table(identifier_tuple)
566+
self.drop_table(identifier_tuple)
552567
io = load_file_io(self.properties, table.metadata_location)
553568
metadata = table.metadata
554569
manifest_lists_to_delete = set()

pyiceberg/catalog/dynamodb.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,8 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
213213
Raises:
214214
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
215215
"""
216-
database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError)
216+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
217+
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
217218
dynamo_table_item = self._get_iceberg_table_item(database_name=database_name, table_name=table_name)
218219
return self._convert_dynamo_table_item_to_iceberg_table(dynamo_table_item=dynamo_table_item)
219220

@@ -226,7 +227,8 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
226227
Raises:
227228
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
228229
"""
229-
database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError)
230+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
231+
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
230232

231233
try:
232234
self._delete_dynamo_item(
@@ -256,7 +258,8 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
256258
NoSuchPropertyException: When from table miss some required properties.
257259
NoSuchNamespaceError: When the destination namespace doesn't exist.
258260
"""
259-
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError)
261+
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
262+
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier_tuple, NoSuchTableError)
260263
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
261264

262265
from_table_item = self._get_iceberg_table_item(database_name=from_database_name, table_name=from_table_name)
@@ -287,7 +290,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
287290
raise TableAlreadyExistsError(f"Table {to_database_name}.{to_table_name} already exists") from e
288291

289292
try:
290-
self.drop_table(from_identifier)
293+
self.drop_table(from_identifier_tuple)
291294
except (NoSuchTableError, GenericDynamoDbError) as e:
292295
log_message = f"Failed to drop old table {from_database_name}.{from_table_name}. "
293296

pyiceberg/catalog/glue.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
265265
Raises:
266266
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
267267
"""
268-
database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError)
268+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
269+
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
269270
try:
270271
load_table_response = self.glue.get_table(DatabaseName=database_name, Name=table_name)
271272
except self.glue.exceptions.EntityNotFoundException as e:
@@ -282,7 +283,8 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
282283
Raises:
283284
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
284285
"""
285-
database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError)
286+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
287+
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
286288
try:
287289
self.glue.delete_table(DatabaseName=database_name, Name=table_name)
288290
except self.glue.exceptions.EntityNotFoundException as e:
@@ -307,7 +309,8 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
307309
NoSuchPropertyException: When from table miss some required properties.
308310
NoSuchNamespaceError: When the destination namespace doesn't exist.
309311
"""
310-
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError)
312+
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
313+
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier_tuple, NoSuchTableError)
311314
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
312315
try:
313316
get_table_response = self.glue.get_table(DatabaseName=from_database_name, Name=from_table_name)

pyiceberg/catalog/hive.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,8 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
347347
Raises:
348348
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
349349
"""
350-
database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError)
350+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
351+
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
351352
try:
352353
with self._client as open_client:
353354
hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name)
@@ -366,7 +367,8 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
366367
Raises:
367368
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
368369
"""
369-
database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError)
370+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
371+
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
370372
try:
371373
with self._client as open_client:
372374
open_client.drop_table(dbname=database_name, name=table_name, deleteData=False)
@@ -393,7 +395,8 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
393395
NoSuchTableError: When a table with the name does not exist.
394396
NoSuchNamespaceError: When the destination namespace doesn't exist.
395397
"""
396-
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError)
398+
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
399+
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier_tuple, NoSuchTableError)
397400
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
398401
try:
399402
with self._client as open_client:

pyiceberg/catalog/rest.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -302,19 +302,20 @@ def _fetch_config(self) -> None:
302302
# Update URI based on overrides
303303
self.uri = config[URI]
304304

305-
def _split_identifier_for_path(self, identifier: Union[str, Identifier, TableIdentifier]) -> Properties:
306-
if isinstance(identifier, TableIdentifier):
307-
return {"namespace": NAMESPACE_SEPARATOR.join(identifier.namespace.root[1:]), "table": identifier.name}
308-
305+
def _identifier_to_validated_tuple(self, identifier: Union[str, Identifier]) -> Identifier:
309306
identifier_tuple = self.identifier_to_tuple(identifier)
310307
if len(identifier_tuple) <= 1:
311308
raise NoSuchTableError(f"Missing namespace or invalid identifier: {'.'.join(identifier_tuple)}")
309+
return identifier_tuple
310+
311+
def _split_identifier_for_path(self, identifier: Union[str, Identifier, TableIdentifier]) -> Properties:
312+
if isinstance(identifier, TableIdentifier):
313+
return {"namespace": NAMESPACE_SEPARATOR.join(identifier.namespace.root[1:]), "table": identifier.name}
314+
identifier_tuple = self._identifier_to_validated_tuple(identifier)
312315
return {"namespace": NAMESPACE_SEPARATOR.join(identifier_tuple[:-1]), "table": identifier_tuple[-1]}
313316

314317
def _split_identifier_for_json(self, identifier: Union[str, Identifier]) -> Dict[str, Union[Identifier, str]]:
315-
identifier_tuple = self.identifier_to_tuple(identifier)
316-
if len(identifier_tuple) <= 1:
317-
raise NoSuchTableError(f"Missing namespace or invalid identifier: {identifier_tuple}")
318+
identifier_tuple = self._identifier_to_validated_tuple(identifier)
318319
return {"namespace": identifier_tuple[:-1], "name": identifier_tuple[-1]}
319320

320321
def _handle_non_200_response(self, exc: HTTPError, error_handler: Dict[int, Type[Exception]]) -> None:
@@ -499,12 +500,10 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
499500
return [(*table.namespace, table.name) for table in ListTablesResponse(**response.json()).identifiers]
500501

501502
def load_table(self, identifier: Union[str, Identifier]) -> Table:
502-
identifier_tuple = self.identifier_to_tuple(identifier)
503-
504-
if len(identifier_tuple) <= 1:
505-
raise NoSuchTableError(f"Missing namespace or invalid identifier: {identifier}")
506-
507-
response = self._session.get(self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier)))
503+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
504+
response = self._session.get(
505+
self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier_tuple))
506+
)
508507
try:
509508
response.raise_for_status()
510509
except HTTPError as exc:
@@ -514,8 +513,11 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
514513
return self._response_to_table(identifier_tuple, table_response)
515514

516515
def drop_table(self, identifier: Union[str, Identifier], purge_requested: bool = False) -> None:
516+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
517517
response = self._session.delete(
518-
self.url(Endpoints.drop_table, prefixed=True, purge=purge_requested, **self._split_identifier_for_path(identifier)),
518+
self.url(
519+
Endpoints.drop_table, prefixed=True, purge=purge_requested, **self._split_identifier_for_path(identifier_tuple)
520+
),
519521
)
520522
try:
521523
response.raise_for_status()
@@ -526,8 +528,9 @@ def purge_table(self, identifier: Union[str, Identifier]) -> None:
526528
self.drop_table(identifier=identifier, purge_requested=True)
527529

528530
def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
531+
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
529532
payload = {
530-
"source": self._split_identifier_for_json(from_identifier),
533+
"source": self._split_identifier_for_json(from_identifier_tuple),
531534
"destination": self._split_identifier_for_json(to_identifier),
532535
}
533536
response = self._session.post(self.url(Endpoints.rename_table), json=payload)

pyiceberg/catalog/sql.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
231231
Raises:
232232
NoSuchTableError: If a table with the name does not exist.
233233
"""
234-
database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError)
234+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
235+
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
235236
with Session(self.engine) as session:
236237
stmt = select(IcebergTables).where(
237238
IcebergTables.catalog_name == self.name,
@@ -252,7 +253,8 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
252253
Raises:
253254
NoSuchTableError: If a table with the name does not exist.
254255
"""
255-
database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError)
256+
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
257+
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
256258
with Session(self.engine) as session:
257259
res = session.execute(
258260
delete(IcebergTables).where(
@@ -280,7 +282,8 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
280282
TableAlreadyExistsError: If a table with the new name already exist.
281283
NoSuchNamespaceError: If the target namespace does not exist.
282284
"""
283-
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError)
285+
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
286+
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier_tuple, NoSuchTableError)
284287
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
285288
if not self._namespace_exists(to_database_name):
286289
raise NoSuchNamespaceError(f"Namespace does not exist: {to_database_name}")

tests/catalog/test_base.py

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,14 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
149149
)
150150

151151
def load_table(self, identifier: Union[str, Identifier]) -> Table:
152-
identifier = Catalog.identifier_to_tuple(identifier)
152+
identifier = self.identifier_to_tuple_without_catalog(identifier)
153153
try:
154154
return self.__tables[identifier]
155155
except KeyError as error:
156156
raise NoSuchTableError(f"Table does not exist: {identifier}") from error
157157

158158
def drop_table(self, identifier: Union[str, Identifier]) -> None:
159-
identifier = Catalog.identifier_to_tuple(identifier)
159+
identifier = self.identifier_to_tuple_without_catalog(identifier)
160160
try:
161161
self.__tables.pop(identifier)
162162
except KeyError as error:
@@ -166,7 +166,7 @@ def purge_table(self, identifier: Union[str, Identifier]) -> None:
166166
self.drop_table(identifier)
167167

168168
def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
169-
from_identifier = Catalog.identifier_to_tuple(from_identifier)
169+
from_identifier = self.identifier_to_tuple_without_catalog(from_identifier)
170170
try:
171171
table = self.__tables.pop(from_identifier)
172172
except KeyError as error:
@@ -352,6 +352,16 @@ def test_load_table(catalog: InMemoryCatalog) -> None:
352352
assert table == given_table
353353

354354

355+
def test_load_table_from_self_identifier(catalog: InMemoryCatalog) -> None:
356+
# Given
357+
given_table = given_catalog_has_a_table(catalog)
358+
# When
359+
intermediate = catalog.load_table(TEST_TABLE_IDENTIFIER)
360+
table = catalog.load_table(intermediate.identifier)
361+
# Then
362+
assert table == given_table
363+
364+
355365
def test_table_raises_error_on_table_not_found(catalog: InMemoryCatalog) -> None:
356366
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
357367
catalog.load_table(TEST_TABLE_IDENTIFIER)
@@ -367,6 +377,18 @@ def test_drop_table(catalog: InMemoryCatalog) -> None:
367377
catalog.load_table(TEST_TABLE_IDENTIFIER)
368378

369379

380+
def test_drop_table_from_self_identifier(catalog: InMemoryCatalog) -> None:
381+
# Given
382+
table = given_catalog_has_a_table(catalog)
383+
# When
384+
catalog.drop_table(table.identifier)
385+
# Then
386+
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
387+
catalog.load_table(table.identifier)
388+
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
389+
catalog.load_table(TEST_TABLE_IDENTIFIER)
390+
391+
370392
def test_drop_table_that_does_not_exist_raise_error(catalog: InMemoryCatalog) -> None:
371393
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
372394
catalog.load_table(TEST_TABLE_IDENTIFIER)
@@ -405,6 +427,31 @@ def test_rename_table(catalog: InMemoryCatalog) -> None:
405427
catalog.load_table(TEST_TABLE_IDENTIFIER)
406428

407429

430+
def test_rename_table_from_self_identifier(catalog: InMemoryCatalog) -> None:
431+
# Given
432+
table = given_catalog_has_a_table(catalog)
433+
434+
# When
435+
new_table_name = "new.namespace.new_table"
436+
new_table = catalog.rename_table(table.identifier, new_table_name)
437+
438+
# Then
439+
assert new_table.identifier == Catalog.identifier_to_tuple(new_table_name)
440+
441+
# And
442+
new_table = catalog.load_table(new_table.identifier)
443+
assert new_table.identifier == Catalog.identifier_to_tuple(new_table_name)
444+
445+
# And
446+
assert ("new", "namespace") in catalog.list_namespaces()
447+
448+
# And
449+
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
450+
catalog.load_table(table.identifier)
451+
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
452+
catalog.load_table(TEST_TABLE_IDENTIFIER)
453+
454+
408455
def test_create_namespace(catalog: InMemoryCatalog) -> None:
409456
# When
410457
catalog.create_namespace(TEST_TABLE_NAMESPACE, TEST_TABLE_PROPERTIES)

0 commit comments

Comments
 (0)