Skip to content

Commit 38f86c8

Browse files
authored
Fix: Add projection to generate_series (#18298)
## Which issue does this PR close? - Closes #17830 ## Rationale for this change The queries from the original ticket fail, because an unprojected `generate_series` function would produce in a join the wrong number of columns which leads to a runtime error. ## What changes are included in this PR? This adds a missing projection to `generate_series` to ensure values are only emitted when projected. ## Are these changes tested? I added a sql-logic test. I also compared the results against Postgres and DuckDB: Postgres: ```sql mkleen=# SELECT v1 FROM (select generate_series as v1 from generate_series(1, 3)) g1, (select generate_series as v2 from generate_series(1, 3)) g2; v1 ---- 1 1 1 2 2 2 3 3 3 (9 rows) ``` DuckDB: ```sql D SELECT v1 FROM (select generate_series as v1 from generate_series(1, 3)) g1, (select generate_series as v2 from generate_series(1, 3)) g2; ┌───────┐ │ v1 │ │ int64 │ ├───────┤ │ 1 │ │ 2 │ │ 3 │ │ 1 │ │ 2 │ │ 3 │ │ 1 │ │ 2 │ │ 3 │ └───────┘ ``` ## Are there any user-facing changes? No
1 parent e12ef3a commit 38f86c8

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

datafusion/functions-table/src/generate_series.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ impl GenerateSeriesTable {
237237
pub fn as_generator(
238238
&self,
239239
batch_size: usize,
240+
projection: Option<Vec<usize>>,
240241
) -> Result<Arc<RwLock<dyn LazyBatchGenerator>>> {
241242
let generator: Arc<RwLock<dyn LazyBatchGenerator>> = match &self.args {
242243
GenSeriesArgs::ContainsNull { name } => Arc::new(RwLock::new(Empty { name })),
@@ -255,6 +256,7 @@ impl GenerateSeriesTable {
255256
batch_size,
256257
include_end: *include_end,
257258
name,
259+
projection,
258260
})),
259261
GenSeriesArgs::TimestampArgs {
260262
start,
@@ -295,6 +297,7 @@ impl GenerateSeriesTable {
295297
batch_size,
296298
include_end: *include_end,
297299
name,
300+
projection,
298301
}))
299302
}
300303
GenSeriesArgs::DateArgs {
@@ -324,6 +327,7 @@ impl GenerateSeriesTable {
324327
batch_size,
325328
include_end: *include_end,
326329
name,
330+
projection,
327331
})),
328332
};
329333

@@ -341,6 +345,7 @@ pub struct GenericSeriesState<T: SeriesValue> {
341345
current: T,
342346
include_end: bool,
343347
name: &'static str,
348+
projection: Option<Vec<usize>>,
344349
}
345350

346351
impl<T: SeriesValue> GenericSeriesState<T> {
@@ -396,7 +401,11 @@ impl<T: SeriesValue> LazyBatchGenerator for GenericSeriesState<T> {
396401

397402
let array = self.current.create_array(buf)?;
398403
let batch = RecordBatch::try_new(Arc::clone(&self.schema), vec![array])?;
399-
Ok(Some(batch))
404+
let projected = match self.projection.as_ref() {
405+
Some(projection) => batch.project(projection)?,
406+
None => batch,
407+
};
408+
Ok(Some(projected))
400409
}
401410
}
402411

@@ -477,7 +486,7 @@ impl TableProvider for GenerateSeriesTable {
477486
None => self.schema(),
478487
};
479488

480-
let generator = self.as_generator(batch_size)?;
489+
let generator = self.as_generator(batch_size, projection.cloned())?;
481490

482491
Ok(Arc::new(LazyMemoryExec::try_new(schema, vec![generator])?))
483492
}

datafusion/proto/src/physical_plan/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1940,7 +1940,8 @@ impl protobuf::PhysicalPlanNode {
19401940
};
19411941

19421942
let table = GenerateSeriesTable::new(Arc::clone(&schema), args);
1943-
let generator = table.as_generator(generate_series.target_batch_size as usize)?;
1943+
let generator =
1944+
table.as_generator(generate_series.target_batch_size as usize, None)?;
19441945

19451946
Ok(Arc::new(LazyMemoryExec::try_new(schema, vec![generator])?))
19461947
}

datafusion/sqllogictest/test_files/table_functions.slt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,21 @@ SELECT generate_series(1, t1.end) FROM generate_series(3, 5) as t1(end)
188188
[1, 2, 3, 4]
189189
[1, 2, 3]
190190

191+
# join with projection on generate_series
192+
query I
193+
select g1.value from generate_series(1, 3) g1 CROSS JOIN generate_series(1, 3) g2;
194+
----
195+
1
196+
1
197+
1
198+
2
199+
2
200+
2
201+
3
202+
3
203+
3
204+
205+
191206
# Test range table function
192207
query I
193208
SELECT * FROM range(6)

0 commit comments

Comments
 (0)