Skip to content

Fix/read parquet mergetree #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,20 @@ concurrency:
cancel-in-progress: true

jobs:
duckdb-next-build:
name: Build extension binaries (next)
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
with:
duckdb_version: main
ci_tools_version: main
extension_name: chsql
# Temporarily disabled because main is broken
# duckdb-next-build:
# name: Build extension binaries (next)
# uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
# with:
# duckdb_version: 1.1.2
# ci_tools_version: 1.1.2
# extension_name: chsql

duckdb-stable-build:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.3
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.2.1
with:
duckdb_version: v1.1.3
ci_tools_version: v1.1.3
duckdb_version: v1.2.1
ci_tools_version: v1.2.1
extension_name: chsql

3 changes: 2 additions & 1 deletion chsql/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(TARGET_NAME chsql)
find_package(OpenSSL REQUIRED)
set(EXTENSION_NAME ${TARGET_NAME}_extension)
set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
set(CHSQL_DUCKDB_VERSION ${DUCKDB_MAJOR_VERSION})
project(${TARGET_NAME})

include_directories(
Expand All @@ -21,7 +22,7 @@ include_directories(
../duckdb/third_party/mbedtls
../duckdb/third_party/mbedtls/include
../duckdb/third_party/brotli/include)
set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp)
set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp src/parquet_types.cpp)
build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
# Link OpenSSL in both the static library as the loadable extension
Expand Down
2 changes: 1 addition & 1 deletion chsql/extension_config.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file is included by DuckDB's build system. It specifies which extension to load

set(CHSQL_DUCKDB_VERSION ${DUCKDB_MAJOR_VERSION})
include_directories(
./src/include
${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include
Expand Down
55 changes: 55 additions & 0 deletions chsql/src/include/chsql_parquet_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//
// Created by hromozeka on 10.03.25.
//

#ifndef PARQUET_TYPES_H
#define PARQUET_TYPES_H


#include "duckdb.hpp"
#include <parquet_types.h>

struct ParquetType {
/*duckdb_parquet::ConvertedType::type -> replaced to int to support -1 nodata value*/
int converted_type;
/* duckdb_parquet::Type::type -> replaced to int to support -1 for no matter value */
int parquet_type;
const duckdb::LogicalType logical_type;
ParquetType(int converted_type, int parquet_type, const duckdb::LogicalType &logical_type)
: converted_type(converted_type), parquet_type(parquet_type), logical_type(logical_type) {}
virtual bool check_type(const duckdb::vector<duckdb_parquet::SchemaElement> &schema, idx_t idx);
virtual duckdb::LogicalType get_logical_type(const duckdb_parquet::SchemaElement &schema);
};

struct LogicalParquetType : public ParquetType {
bool (*get_isset)(const duckdb_parquet::SchemaElement& el);

LogicalParquetType(bool (*get_isset) (const duckdb_parquet::SchemaElement& el),
const duckdb::LogicalType& logical_type)
: ParquetType(-1, duckdb_parquet::Type::type::INT32, logical_type), get_isset(get_isset) {}
bool check_type(const duckdb::vector<duckdb_parquet::SchemaElement> &schema, idx_t idx) override;
};

struct JSONParquetType : public ParquetType {
JSONParquetType(): ParquetType(duckdb_parquet::ConvertedType::JSON, -1, duckdb::LogicalType::SQLNULL) {}
duckdb::LogicalType get_logical_type(const duckdb_parquet::SchemaElement &schema) override;
};

struct DecimalParquetType : public ParquetType {
DecimalParquetType(): ParquetType(-1, duckdb_parquet::Type::type::INT32, duckdb::LogicalType::SQLNULL) {}
bool check_type(const duckdb::vector<duckdb_parquet::SchemaElement> &schema, idx_t idx) override;
duckdb::LogicalType get_logical_type(const duckdb_parquet::SchemaElement &schema) override;
};

class ParquetTypesManager {
protected:
static ParquetTypesManager *instance;
static std::mutex instance_mutex;
ParquetTypesManager();
static ParquetTypesManager* get_instance();
duckdb::LogicalType derive_logical_type(const duckdb_parquet::SchemaElement &s_ele, bool binary_as_string);
public:
static duckdb::LogicalType get_logical_type(const duckdb::vector<duckdb_parquet::SchemaElement> &schema, idx_t idx);
};

#endif //PARQUET_TYPES_H
Loading
Loading