Skip to content

Commit 114355c

Browse files
committed
Scan until we get a non-empty partition, then use those dimension columns.
1 parent 7e9a7b2 commit 114355c

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

src/lib.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,13 +168,21 @@ impl PrunableStreamingTable {
168168
schema: SchemaRef,
169169
partitions: Vec<(Arc<dyn PartitionStream>, PartitionMetadata)>,
170170
) -> Self {
171-
// Collect dimension column names across ALL partitions so that a
172-
// zero-length first partition (empty ranges map) doesn't silently
173-
// disable pruning for the entire table.
171+
// Collect dimension column names from the first partition that has
172+
// non-empty metadata. All partitions share the same dimension names,
173+
// so we only need one representative. Using find_map keeps this O(D)
174+
// rather than O(N × D) — important when N is in the hundreds of
175+
// thousands (e.g. hourly chunks of a decades-long climate dataset).
174176
let dimension_columns: std::collections::HashSet<String> = partitions
175177
.iter()
176-
.flat_map(|(_, meta)| meta.ranges.keys().cloned())
177-
.collect();
178+
.find_map(|(_, meta)| {
179+
if meta.ranges.is_empty() {
180+
None
181+
} else {
182+
Some(meta.ranges.keys().cloned().collect())
183+
}
184+
})
185+
.unwrap_or_default();
178186

179187
Self {
180188
schema,

0 commit comments

Comments
 (0)