diff --git a/docs/en/engines/table-engines/integrations/iceberg.md b/docs/en/engines/table-engines/integrations/iceberg.md index ffe121665d4b..3cba2b54e2a4 100644 --- a/docs/en/engines/table-engines/integrations/iceberg.md +++ b/docs/en/engines/table-engines/integrations/iceberg.md @@ -1,6 +1,6 @@ --- description: 'This engine provides a read-only integration with existing Apache Iceberg - tables in Amazon S3, Azure, HDFS and locally stored tables.' + tables in Amazon S3, Azure and HDFS.' sidebar_label: 'Iceberg' sidebar_position: 90 slug: /engines/table-engines/integrations/iceberg @@ -17,7 +17,7 @@ The Iceberg Table Engine is available but may have limitations. ClickHouse wasn' For optimal compatibility, we suggest using the Iceberg Table Function while we continue to improve support for the Iceberg Table Engine. ::: -This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure, HDFS and locally stored tables. +This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure and HDFS. ## Create Table {#create-table} @@ -32,9 +32,6 @@ CREATE TABLE iceberg_table_azure CREATE TABLE iceberg_table_hdfs ENGINE = IcebergHDFS(path_to_table, [,format] [,compression_method]) - -CREATE TABLE iceberg_table_local - ENGINE = IcebergLocal(path_to_table, [,format] [,compression_method]) ``` **Engine arguments** diff --git a/docs/en/sql-reference/table-functions/iceberg.md b/docs/en/sql-reference/table-functions/iceberg.md index 0df71713faae..8be7b1203a0a 100644 --- a/docs/en/sql-reference/table-functions/iceberg.md +++ b/docs/en/sql-reference/table-functions/iceberg.md @@ -1,6 +1,6 @@ --- description: 'Provides a read-only table-like interface to Apache Iceberg tables in - Amazon S3, Azure, HDFS or locally stored.' + Amazon S3, Azure or HDFS.' sidebar_label: 'iceberg' sidebar_position: 90 slug: /sql-reference/table-functions/iceberg @@ -9,7 +9,7 @@ title: 'iceberg' # iceberg Table Function -Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure, HDFS or locally stored. +Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure or HDFS. ## Syntax {#syntax} @@ -22,9 +22,6 @@ icebergAzure(named_collection[, option=value [,..]]) icebergHDFS(path_to_table, [,format] [,compression_method]) icebergHDFS(named_collection[, option=value [,..]]) - -icebergLocal(path_to_table, [,format] [,compression_method]) -icebergLocal(named_collection[, option=value [,..]]) ``` ## Arguments {#arguments} @@ -42,7 +39,7 @@ SELECT * FROM icebergS3('http://test.s3.amazonaws.com/clickhouse-bucket/test_tab ``` :::important -ClickHouse currently supports reading v1 and v2 of the Iceberg format via the `icebergS3`, `icebergAzure`, `icebergHDFS` and `icebergLocal` table functions and `IcebergS3`, `icebergAzure`, `IcebergHDFS` and `IcebergLocal` table engines. +ClickHouse currently supports reading v1 and v2 of the Iceberg format via the `icebergS3`, `icebergAzure`, `icebergHDFS` table functions and `IcebergS3`, `IcebergAzure`, `IcebergHDFS` table engines. ::: ## Defining a named collection {#defining-a-named-collection} diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index f62b9cae37f0..c3703529179a 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -231,19 +231,6 @@ void registerStorageIceberg(StorageFactory & factory) .has_builtin_setting_fn = StorageObjectStorageSettings::hasBuiltin, }); # endif - factory.registerStorage( - "IcebergLocal", - [&](const StorageFactory::Arguments & args) - { - auto configuration = std::make_shared(); - return createStorageObjectStorage(args, configuration); - }, - { - .supports_settings = true, - .supports_schema_inference = true, - .source_access_type = AccessType::FILE, - .has_builtin_setting_fn = StorageObjectStorageSettings::hasBuiltin, - }); } #endif diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index 9d95cb73877b..b527587a1167 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -341,12 +341,6 @@ void registerTableFunctionIceberg(TableFunctionFactory & factory) .category{""}}, .allow_readonly = false}); #endif - factory.registerFunction( - {.documentation - = {.description = R"(The table function can be used to read the Iceberg table stored locally.)", - .examples{{"icebergLocal", "SELECT * FROM icebergLocal(filename)", ""}}, - .category{""}}, - .allow_readonly = false}); } #endif diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h index 94d3542ad48f..311353c3ed48 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.h +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -83,12 +83,6 @@ struct IcebergAzureDefinition static constexpr auto storage_type_name = "Azure"; }; -struct IcebergLocalDefinition -{ - static constexpr auto name = "icebergLocal"; - static constexpr auto storage_type_name = "Local"; -}; - struct IcebergHDFSDefinition { static constexpr auto name = "icebergHDFS"; @@ -197,7 +191,6 @@ using TableFunctionIcebergAzure = TableFunctionObjectStorage; # endif -using TableFunctionIcebergLocal = TableFunctionObjectStorage; #endif #if USE_AWS_S3 # if USE_PARQUET && USE_DELTA_KERNEL_RS diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index b429b1349514..3d4b4303f3ba 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -258,22 +258,6 @@ def get_creation_expression( + settings_expression ) - elif storage_type == "local": - assert not run_on_cluster - - if table_function: - return f""" - icebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format}) - """ - else: - return ( - f""" - DROP TABLE IF EXISTS {table_name}; - CREATE TABLE {table_name} - ENGINE=IcebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format})""" - + settings_expression - ) - else: raise Exception(f"Unknown iceberg storage type: {storage_type}") @@ -336,11 +320,7 @@ def create_initial_data_file( def default_upload_directory( started_cluster, storage_type, local_path, remote_path, **kwargs ): - if storage_type == "local": - return started_cluster.default_local_uploader.upload_directory( - local_path, remote_path, **kwargs - ) - elif storage_type == "s3": + if storage_type == "s3": print(kwargs) return started_cluster.default_s3_uploader.upload_directory( local_path, remote_path, **kwargs @@ -354,7 +334,7 @@ def default_upload_directory( @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_single_iceberg_file(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -384,7 +364,7 @@ def test_single_iceberg_file(started_cluster, format_version, storage_type): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_partition_by(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -419,7 +399,7 @@ def test_partition_by(started_cluster, format_version, storage_type): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_multiple_iceberg_files(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -479,7 +459,7 @@ def test_multiple_iceberg_files(started_cluster, format_version, storage_type): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_types(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -645,7 +625,7 @@ def add_df(mode): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_delete_files(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -715,7 +695,7 @@ def test_delete_files(started_cluster, format_version, storage_type): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) @pytest.mark.parametrize("is_table_function", [False, True]) def test_evolved_schema_simple( started_cluster, format_version, storage_type, is_table_function @@ -1123,7 +1103,7 @@ def execute_spark_query(query: str): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_not_evolved_schema(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -1442,7 +1422,7 @@ def execute_spark_query(query: str): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_evolved_schema_complex(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -1535,7 +1515,7 @@ def execute_spark_query(query: str): assert "UNSUPPORTED_METHOD" in error -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_row_based_deletes(started_cluster, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -1572,7 +1552,7 @@ def test_row_based_deletes(started_cluster, storage_type): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_schema_inference(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -1640,7 +1620,7 @@ def test_schema_inference(started_cluster, format_version, storage_type): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_explanation(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -1695,7 +1675,7 @@ def test_explanation(started_cluster, format_version, storage_type): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_metadata_file_selection(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -1730,7 +1710,7 @@ def test_metadata_file_selection(started_cluster, format_version, storage_type): @pytest.mark.parametrize("format_version", ["1", "2"]) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_metadata_file_format_with_uuid(started_cluster, format_version, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session @@ -1908,7 +1888,7 @@ def test_filesystem_cache(started_cluster, storage_type): ) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_partition_pruning(started_cluster, storage_type): if is_arm() and storage_type == "hdfs": pytest.skip("Disabled test IcebergHDFS for aarch64") @@ -2129,7 +2109,7 @@ def check_validity_and_get_prunned_files(select_expression): == 1 ) -@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"]) +@pytest.mark.parametrize("storage_type", ["s3", "azure"]) def test_explicit_metadata_file(started_cluster, storage_type): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session