Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions docs/en/engines/table-engines/integrations/iceberg.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
description: 'This engine provides a read-only integration with existing Apache Iceberg
tables in Amazon S3, Azure, HDFS and locally stored tables.'
tables in Amazon S3, Azure and HDFS.'
sidebar_label: 'Iceberg'
sidebar_position: 90
slug: /engines/table-engines/integrations/iceberg
Expand All @@ -17,7 +17,7 @@ The Iceberg Table Engine is available but may have limitations. ClickHouse wasn'
For optimal compatibility, we suggest using the Iceberg Table Function while we continue to improve support for the Iceberg Table Engine.
:::

This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure, HDFS and locally stored tables.
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure and HDFS.

## Create Table {#create-table}

Expand All @@ -32,9 +32,6 @@ CREATE TABLE iceberg_table_azure

CREATE TABLE iceberg_table_hdfs
ENGINE = IcebergHDFS(path_to_table, [,format] [,compression_method])

CREATE TABLE iceberg_table_local
ENGINE = IcebergLocal(path_to_table, [,format] [,compression_method])
```

**Engine arguments**
Expand Down
9 changes: 3 additions & 6 deletions docs/en/sql-reference/table-functions/iceberg.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
description: 'Provides a read-only table-like interface to Apache Iceberg tables in
Amazon S3, Azure, HDFS or locally stored.'
Amazon S3, Azure or HDFS.'
sidebar_label: 'iceberg'
sidebar_position: 90
slug: /sql-reference/table-functions/iceberg
Expand All @@ -9,7 +9,7 @@ title: 'iceberg'

# iceberg Table Function

Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure, HDFS or locally stored.
Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure or HDFS.

## Syntax {#syntax}

Expand All @@ -22,9 +22,6 @@ icebergAzure(named_collection[, option=value [,..]])

icebergHDFS(path_to_table, [,format] [,compression_method])
icebergHDFS(named_collection[, option=value [,..]])

icebergLocal(path_to_table, [,format] [,compression_method])
icebergLocal(named_collection[, option=value [,..]])
```

## Arguments {#arguments}
Expand All @@ -42,7 +39,7 @@ SELECT * FROM icebergS3('http://test.s3.amazonaws.com/clickhouse-bucket/test_tab
```

:::important
ClickHouse currently supports reading v1 and v2 of the Iceberg format via the `icebergS3`, `icebergAzure`, `icebergHDFS` and `icebergLocal` table functions and `IcebergS3`, `icebergAzure`, `IcebergHDFS` and `IcebergLocal` table engines.
ClickHouse currently supports reading v1 and v2 of the Iceberg format via the `icebergS3`, `icebergAzure`, `icebergHDFS` table functions and `IcebergS3`, `IcebergAzure`, `IcebergHDFS` table engines.
:::

## Defining a named collection {#defining-a-named-collection}
Expand Down
13 changes: 0 additions & 13 deletions src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,19 +231,6 @@ void registerStorageIceberg(StorageFactory & factory)
.has_builtin_setting_fn = StorageObjectStorageSettings::hasBuiltin,
});
# endif
factory.registerStorage(
"IcebergLocal",
[&](const StorageFactory::Arguments & args)
{
auto configuration = std::make_shared<StorageLocalIcebergConfiguration>();
return createStorageObjectStorage(args, configuration);
},
{
.supports_settings = true,
.supports_schema_inference = true,
.source_access_type = AccessType::FILE,
.has_builtin_setting_fn = StorageObjectStorageSettings::hasBuiltin,
});
}

#endif
Expand Down
6 changes: 0 additions & 6 deletions src/TableFunctions/TableFunctionObjectStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,12 +341,6 @@ void registerTableFunctionIceberg(TableFunctionFactory & factory)
.category{""}},
.allow_readonly = false});
#endif
factory.registerFunction<TableFunctionIcebergLocal>(
{.documentation
= {.description = R"(The table function can be used to read the Iceberg table stored locally.)",
.examples{{"icebergLocal", "SELECT * FROM icebergLocal(filename)", ""}},
.category{""}},
.allow_readonly = false});
}
#endif

Expand Down
7 changes: 0 additions & 7 deletions src/TableFunctions/TableFunctionObjectStorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,6 @@ struct IcebergAzureDefinition
static constexpr auto storage_type_name = "Azure";
};

struct IcebergLocalDefinition
{
static constexpr auto name = "icebergLocal";
static constexpr auto storage_type_name = "Local";
};

struct IcebergHDFSDefinition
{
static constexpr auto name = "icebergHDFS";
Expand Down Expand Up @@ -197,7 +191,6 @@ using TableFunctionIcebergAzure = TableFunctionObjectStorage<IcebergAzureDefinit
# if USE_HDFS
using TableFunctionIcebergHDFS = TableFunctionObjectStorage<IcebergHDFSDefinition, StorageHDFSIcebergConfiguration>;
# endif
using TableFunctionIcebergLocal = TableFunctionObjectStorage<IcebergLocalDefinition, StorageLocalIcebergConfiguration>;
#endif
#if USE_AWS_S3
# if USE_PARQUET && USE_DELTA_KERNEL_RS
Expand Down
52 changes: 16 additions & 36 deletions tests/integration/test_storage_iceberg/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,22 +258,6 @@ def get_creation_expression(
+ settings_expression
)

elif storage_type == "local":
assert not run_on_cluster

if table_function:
return f"""
icebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format})
"""
else:
return (
f"""
DROP TABLE IF EXISTS {table_name};
CREATE TABLE {table_name}
ENGINE=IcebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format})"""
+ settings_expression
)

else:
raise Exception(f"Unknown iceberg storage type: {storage_type}")

Expand Down Expand Up @@ -336,11 +320,7 @@ def create_initial_data_file(
def default_upload_directory(
started_cluster, storage_type, local_path, remote_path, **kwargs
):
if storage_type == "local":
return started_cluster.default_local_uploader.upload_directory(
local_path, remote_path, **kwargs
)
elif storage_type == "s3":
if storage_type == "s3":
print(kwargs)
return started_cluster.default_s3_uploader.upload_directory(
local_path, remote_path, **kwargs
Expand All @@ -354,7 +334,7 @@ def default_upload_directory(


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_single_iceberg_file(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -384,7 +364,7 @@ def test_single_iceberg_file(started_cluster, format_version, storage_type):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_partition_by(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -419,7 +399,7 @@ def test_partition_by(started_cluster, format_version, storage_type):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_multiple_iceberg_files(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -479,7 +459,7 @@ def test_multiple_iceberg_files(started_cluster, format_version, storage_type):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_types(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -645,7 +625,7 @@ def add_df(mode):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_delete_files(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -715,7 +695,7 @@ def test_delete_files(started_cluster, format_version, storage_type):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
@pytest.mark.parametrize("is_table_function", [False, True])
def test_evolved_schema_simple(
started_cluster, format_version, storage_type, is_table_function
Expand Down Expand Up @@ -1123,7 +1103,7 @@ def execute_spark_query(query: str):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_not_evolved_schema(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -1442,7 +1422,7 @@ def execute_spark_query(query: str):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_evolved_schema_complex(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -1535,7 +1515,7 @@ def execute_spark_query(query: str):
assert "UNSUPPORTED_METHOD" in error


@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_row_based_deletes(started_cluster, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -1572,7 +1552,7 @@ def test_row_based_deletes(started_cluster, storage_type):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_schema_inference(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -1640,7 +1620,7 @@ def test_schema_inference(started_cluster, format_version, storage_type):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_explanation(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -1695,7 +1675,7 @@ def test_explanation(started_cluster, format_version, storage_type):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_metadata_file_selection(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -1730,7 +1710,7 @@ def test_metadata_file_selection(started_cluster, format_version, storage_type):


@pytest.mark.parametrize("format_version", ["1", "2"])
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_metadata_file_format_with_uuid(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down Expand Up @@ -1908,7 +1888,7 @@ def test_filesystem_cache(started_cluster, storage_type):
)


@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_partition_pruning(started_cluster, storage_type):
if is_arm() and storage_type == "hdfs":
pytest.skip("Disabled test IcebergHDFS for aarch64")
Expand Down Expand Up @@ -2129,7 +2109,7 @@ def check_validity_and_get_prunned_files(select_expression):
== 1
)

@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
@pytest.mark.parametrize("storage_type", ["s3", "azure"])
def test_explicit_metadata_file(started_cluster, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
Expand Down
Loading