From 9158e54609451967c0ba46d67d06f8ad1b582309 Mon Sep 17 00:00:00 2001 From: Votre Nom Date: Sat, 31 May 2025 00:13:23 +0200 Subject: [PATCH 1/3] feat: Update duckdb and fix parquet scan --- chsql/src/duck_flock.cpp | 2 +- chsql/src/parquet_ordered_scan.cpp | 27 +++++++++++++-------------- duckdb | 2 +- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/chsql/src/duck_flock.cpp b/chsql/src/duck_flock.cpp index 79a821d..b93eaaa 100644 --- a/chsql/src/duck_flock.cpp +++ b/chsql/src/duck_flock.cpp @@ -104,7 +104,7 @@ namespace duckdb { try { if (res->TryFetch(data_chunk, error_data)) { - if (data_chunk && !data_chunk->size() == 0) { + if (data_chunk && data_chunk->size() != 0) { output.Append(*data_chunk); return; } diff --git a/chsql/src/parquet_ordered_scan.cpp b/chsql/src/parquet_ordered_scan.cpp index e41117b..81dd069 100644 --- a/chsql/src/parquet_ordered_scan.cpp +++ b/chsql/src/parquet_ordered_scan.cpp @@ -2,7 +2,7 @@ #include "duckdb/common/exception.hpp" #include #include "chsql_extension.hpp" -#include +#include #include "chsql_parquet_types.h" namespace duckdb { @@ -35,11 +35,10 @@ namespace duckdb { haveAbsentColumns = true; continue; } - columnMap.push_back(schema_column - reader->metadata->metadata->schema.begin() - 1); - reader->reader_data.column_ids.push_back( - schema_column - reader->metadata->metadata->schema.begin() - 1); - reader->reader_data.column_mapping.push_back( - it - returnCols.begin()); + columnMap.push_back(static_cast(schema_column - reader->metadata->metadata->schema.begin() - 1)); + reader->column_ids.push_back( + MultiFileLocalColumnId(static_cast(schema_column - reader->metadata->metadata->schema.begin() - 1))); + reader->column_indexes.emplace_back(static_cast(it - returnCols.begin())); } auto order_by_column_it = find_if( reader->metadata->metadata->schema.begin(), @@ -55,7 +54,7 @@ namespace duckdb { } void Scan(ClientContext& ctx) { chunk->Reset(); - reader->Scan(*scanState, *chunk); + reader->Scan(ctx, *scanState, *chunk); if (!haveAbsentColumns || chunk->size() == 0) { return; } @@ -180,7 +179,7 @@ namespace duckdb { ParquetOptions po; po.binary_as_string = true; set->reader = make_uniq(context, file, po, nullptr); - res.push_back(move(set)); + res.push_back(std::move(set)); } } @@ -189,16 +188,16 @@ namespace duckdb { Connection conn(*context.db); auto res = make_uniq(); auto files = ListValue::GetChildren(input.inputs[0]); - vector fileNames; + vector fileInfoList; for (auto & file : files) { - fileNames.push_back(file.ToString()); + fileInfoList.emplace_back(file.ToString()); } - GlobMultiFileList fileList(context, fileNames, FileGlobOptions::ALLOW_EMPTY); - string filename; + GlobMultiFileList fileList(context, fileInfoList, FileGlobOptions::ALLOW_EMPTY); + OpenFileInfo file_info; MultiFileListScanData it; fileList.InitializeScan(it); - while (fileList.Scan(it, filename)) { - res->files.push_back(filename); + while (fileList.Scan(it, file_info)) { + res->files.push_back(file_info.path); } if (res->files.empty()) { throw InvalidInputException("No files matched the provided pattern."); diff --git a/duckdb b/duckdb index 7c0f857..71c5c07 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 7c0f8574bda9af7aa5b23166d7860d68ae3b9481 +Subproject commit 71c5c07cdd295e9409c0505885033ae9eb6b5ddd From a17307ce91ffef9dd0f903a44a4454c031af7fef Mon Sep 17 00:00:00 2001 From: Votre Nom Date: Tue, 3 Jun 2025 19:35:22 +0200 Subject: [PATCH 2/3] Update MainDistributionPipeline to v1.3.0 --- .github/workflows/MainDistributionPipeline.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 9c03355..f361eb5 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -28,18 +28,18 @@ jobs: # We have to build v1.2.0 based due to go-duckdb restrictions duckdb-1-2-0-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.2.1 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.0 with: duckdb_version: v1.2.0 - ci_tools_version: v1.2.0 + ci_tools_version: v1.3.0 extension_name: chsql duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.2.1 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.0 with: - duckdb_version: v1.2.1 - ci_tools_version: v1.2.1 + duckdb_version: v1.3.0 + ci_tools_version: v1.3.0 extension_name: chsql release-all-artifacts: @@ -72,4 +72,4 @@ jobs: - name: Upload Release Assets uses: softprops/action-gh-release@v1 with: - files: to-upload/* \ No newline at end of file + files: to-upload/* From e18dfea3e71e035090f16ed3f5287b5c085f1cc8 Mon Sep 17 00:00:00 2001 From: Votre Nom Date: Tue, 3 Jun 2025 20:09:05 +0200 Subject: [PATCH 3/3] v1.3 build --- .github/workflows/MainDistributionPipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index f361eb5..452937d 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -30,7 +30,7 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.0 with: - duckdb_version: v1.2.0 + duckdb_version: v1.3.0 ci_tools_version: v1.3.0 extension_name: chsql @@ -44,7 +44,7 @@ jobs: release-all-artifacts: name: Process Extension Artifacts - needs: [duckdb-1-2-0-build, duckdb-stable-build] + needs: [duckdb-1-3-0-build, duckdb-stable-build] if: github.event_name == 'release' && github.event.action == 'published' runs-on: ubuntu-latest steps: