Skip to content

Commit bddec49

Browse files
committed
Allow configuring parquet_bloom_filter_columns in Iceberg
1 parent c6c77cf commit bddec49

File tree

3 files changed

+40
-1
lines changed

3 files changed

+40
-1
lines changed

plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java

+20
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@
133133
import org.apache.iceberg.DataFiles;
134134
import org.apache.iceberg.DeleteFile;
135135
import org.apache.iceberg.DeleteFiles;
136+
import org.apache.iceberg.FileFormat;
136137
import org.apache.iceberg.FileMetadata;
137138
import org.apache.iceberg.FileScanTask;
138139
import org.apache.iceberg.IsolationLevel;
@@ -281,12 +282,14 @@
281282
import static io.trino.plugin.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY;
282283
import static io.trino.plugin.iceberg.IcebergTableProperties.FORMAT_VERSION_PROPERTY;
283284
import static io.trino.plugin.iceberg.IcebergTableProperties.OBJECT_STORE_LAYOUT_ENABLED_PROPERTY;
285+
import static io.trino.plugin.iceberg.IcebergTableProperties.PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY;
284286
import static io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY;
285287
import static io.trino.plugin.iceberg.IcebergTableProperties.SORTED_BY_PROPERTY;
286288
import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning;
287289
import static io.trino.plugin.iceberg.IcebergTableProperties.getTableLocation;
288290
import static io.trino.plugin.iceberg.IcebergUtil.buildPath;
289291
import static io.trino.plugin.iceberg.IcebergUtil.canEnforceColumnConstraintInSpecs;
292+
import static io.trino.plugin.iceberg.IcebergUtil.checkFormatForProperty;
290293
import static io.trino.plugin.iceberg.IcebergUtil.commit;
291294
import static io.trino.plugin.iceberg.IcebergUtil.createColumnHandle;
292295
import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue;
@@ -370,6 +373,7 @@
370373
import static org.apache.iceberg.TableProperties.DELETE_ISOLATION_LEVEL_DEFAULT;
371374
import static org.apache.iceberg.TableProperties.FORMAT_VERSION;
372375
import static org.apache.iceberg.TableProperties.OBJECT_STORE_ENABLED;
376+
import static org.apache.iceberg.TableProperties.PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX;
373377
import static org.apache.iceberg.TableProperties.WRITE_DATA_LOCATION;
374378
import static org.apache.iceberg.TableProperties.WRITE_LOCATION_PROVIDER_IMPL;
375379
import static org.apache.iceberg.expressions.Expressions.alwaysTrue;
@@ -392,6 +396,7 @@ public class IcebergMetadata
392396
.add(FORMAT_VERSION_PROPERTY)
393397
.add(OBJECT_STORE_LAYOUT_ENABLED_PROPERTY)
394398
.add(DATA_LOCATION_PROPERTY)
399+
.add(PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY)
395400
.add(PARTITIONING_PROPERTY)
396401
.add(SORTED_BY_PROPERTY)
397402
.build();
@@ -2273,6 +2278,21 @@ public void setTableProperties(ConnectorSession session, ConnectorTableHandle ta
22732278
extraProperties.forEach(updateProperties::set);
22742279
}
22752280

2281+
if (properties.containsKey(PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY)) {
2282+
checkFormatForProperty(getFileFormat(icebergTable).toIceberg(), FileFormat.PARQUET, PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY);
2283+
//noinspection unchecked
2284+
List<String> parquetBloomFilterColumns = (List<String>) properties.get(PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY)
2285+
.orElseThrow(() -> new IllegalArgumentException("The parquet_bloom_filter_columns property cannot be empty"));
2286+
if (parquetBloomFilterColumns.isEmpty()) {
2287+
icebergTable.properties().keySet().stream()
2288+
.filter(key -> key.startsWith(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX))
2289+
.forEach(updateProperties::remove);
2290+
}
2291+
else {
2292+
parquetBloomFilterColumns.forEach(column -> updateProperties.set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + column, "true"));
2293+
}
2294+
}
2295+
22762296
if (properties.containsKey(FILE_FORMAT_PROPERTY)) {
22772297
IcebergFileFormat fileFormat = (IcebergFileFormat) properties.get(FILE_FORMAT_PROPERTY)
22782298
.orElseThrow(() -> new IllegalArgumentException("The format property cannot be empty"));

plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -986,7 +986,7 @@ public static long getSnapshotIdAsOfTime(Table table, long epochMillis)
986986
.snapshotId();
987987
}
988988

989-
private static void checkFormatForProperty(FileFormat actualStorageFormat, FileFormat expectedStorageFormat, String propertyName)
989+
public static void checkFormatForProperty(FileFormat actualStorageFormat, FileFormat expectedStorageFormat, String propertyName)
990990
{
991991
if (actualStorageFormat != expectedStorageFormat) {
992992
throw new TrinoException(INVALID_TABLE_PROPERTY, format("Cannot specify %s table property for storage format: %s", propertyName, actualStorageFormat));

plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetWithBloomFilters.java

+19
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,25 @@ public void testBloomFilterPropertiesArePersistedDuringCreate()
5858
"format = 'parquet'," +
5959
"parquet_bloom_filter_columns = array['a','B'])");
6060

61+
verifyTableProperties(tableName);
62+
}
63+
64+
@Test
65+
void testBloomFilterPropertiesArePersistedDuringSetProperties()
66+
{
67+
String tableName = "test_metadata_write_properties_" + randomNameSuffix();
68+
assertQuerySucceeds("CREATE TABLE " + tableName + "(A bigint, b bigint, c bigint)");
69+
70+
assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES parquet_bloom_filter_columns = ARRAY['a','B']");
71+
verifyTableProperties(tableName);
72+
73+
assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES parquet_bloom_filter_columns = ARRAY[]");
74+
assertThat((String) computeScalar("SHOW CREATE TABLE " + tableName))
75+
.doesNotContain("parquet_bloom_filter_columns");
76+
}
77+
78+
private void verifyTableProperties(String tableName)
79+
{
6180
MaterializedResult actualProperties = computeActual("SELECT * FROM \"" + tableName + "$properties\"");
6281
assertThat(actualProperties).isNotNull();
6382
MaterializedResult expectedProperties = resultBuilder(getSession())

0 commit comments

Comments
 (0)