Skip to content

Commit 5b97d41

Browse files
committed
patch scanner
1 parent 95d47da commit 5b97d41

File tree

1 file changed

+20
-191
lines changed

1 file changed

+20
-191
lines changed

chsql/src/clickhouse_scan.cpp

Lines changed: 20 additions & 191 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
#include "clickhouse_scan.hpp"
22
#include "duckdb/common/exception.hpp"
33
#include "duckdb/main/secret/secret_manager.hpp"
4-
#include "duckdb/common/types/chunk_collection.hpp"
4+
#include "duckdb/function/table_function.hpp"
5+
#include "duckdb/main/client_context.hpp"
6+
#include "duckdb/common/string_util.hpp"
7+
#include "duckdb/main/extension_util.hpp"
58
#include <clickhouse/client.h>
69

710
namespace duckdb {
@@ -22,7 +25,7 @@ struct ClickHouseBindData : public TableFunctionData {
2225
};
2326

2427
// Convert ClickHouse type to DuckDB LogicalType
25-
static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef& column) {
28+
static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef column) {
2629
switch (column->Type()->GetCode()) {
2730
// Integer types
2831
case clickhouse::Type::Int8:
@@ -33,8 +36,6 @@ static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef& column) {
3336
return LogicalType::INTEGER;
3437
case clickhouse::Type::Int64:
3538
return LogicalType::BIGINT;
36-
case clickhouse::Type::Int128:
37-
return LogicalType::HUGEINT;
3839

3940
// Unsigned integer types
4041
case clickhouse::Type::UInt8:
@@ -59,31 +60,20 @@ static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef& column) {
5960

6061
// Date and Time types
6162
case clickhouse::Type::Date:
62-
return LogicalType::DATE;
6363
case clickhouse::Type::Date32:
6464
return LogicalType::DATE;
6565
case clickhouse::Type::DateTime:
66-
return LogicalType::TIMESTAMP;
6766
case clickhouse::Type::DateTime64:
6867
return LogicalType::TIMESTAMP;
6968

70-
// Boolean type
71-
case clickhouse::Type::Nothing:
72-
return LogicalType::BOOLEAN;
73-
74-
// Decimal types
75-
case clickhouse::Type::Decimal:
76-
case clickhouse::Type::Decimal32:
77-
case clickhouse::Type::Decimal64:
78-
case clickhouse::Type::Decimal128:
79-
// Get precision and scale from the type
80-
auto decimal_type = static_cast<const clickhouse::DecimalType*>(column->Type().get());
81-
return LogicalType::DECIMAL(decimal_type->GetPrecision(), decimal_type->GetScale());
69+
// Default to VARCHAR for unsupported types
70+
default:
71+
return LogicalType::VARCHAR;
8272
}
8373
}
8474

8575
static void ClickHouseScanFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
86-
auto &bind_data = data_p.bind_data->Cast<ClickHouseBindData>();
76+
auto &bind_data = const_cast<ClickHouseBindData&>(data_p.bind_data->Cast<ClickHouseBindData>());
8777

8878
if (bind_data.finished) {
8979
return;
@@ -105,192 +95,31 @@ static void ClickHouseScanFunction(ClientContext &context, TableFunctionInput &d
10595
output.SetCardinality(row_count);
10696

10797
for (idx_t col_idx = 0; col_idx < block.GetColumnCount(); col_idx++) {
108-
auto& target = output.data[col_idx];
109-
auto& source = block[col_idx];
98+
const auto source = block[col_idx];
99+
auto &target = output.data[col_idx];
110100

111101
// Convert and copy data based on type
112102
switch (bind_data.types[col_idx].id()) {
113-
// String types
114103
case LogicalTypeId::VARCHAR: {
115-
if (source->Type()->GetCode() == clickhouse::Type::FixedString) {
116-
auto& strings = source->As<clickhouse::ColumnFixedString>();
117-
auto& target_vector = FlatVector::GetData<string_t>(target);
118-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
119-
target_vector[row_idx] = StringVector::AddString(target, strings->At(row_idx));
120-
}
121-
} else {
122-
auto& strings = source->As<clickhouse::ColumnString>();
123-
auto& target_vector = FlatVector::GetData<string_t>(target);
124-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
125-
target_vector[row_idx] = StringVector::AddString(target, strings->At(row_idx));
126-
}
127-
}
128-
break;
129-
}
130-
131-
// Integer types
132-
case LogicalTypeId::TINYINT: {
133-
auto& integers = source->As<clickhouse::ColumnInt8>();
134-
auto& target_vector = FlatVector::GetData<int8_t>(target);
104+
const auto strings = source->As<clickhouse::ColumnString>();
105+
auto target_vector = FlatVector::GetData<string_t>(target);
135106
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
136-
target_vector[row_idx] = integers->At(row_idx);
137-
}
138-
break;
139-
}
140-
case LogicalTypeId::SMALLINT: {
141-
auto& integers = source->As<clickhouse::ColumnInt16>();
142-
auto& target_vector = FlatVector::GetData<int16_t>(target);
143-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
144-
target_vector[row_idx] = integers->At(row_idx);
107+
auto sv = strings->At(row_idx);
108+
target_vector[row_idx] = StringVector::AddString(target, sv.data(), sv.size());
145109
}
146110
break;
147111
}
148112
case LogicalTypeId::INTEGER: {
149-
auto& integers = source->As<clickhouse::ColumnInt32>();
150-
auto& target_vector = FlatVector::GetData<int32_t>(target);
151-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
152-
target_vector[row_idx] = integers->At(row_idx);
153-
}
154-
break;
155-
}
156-
case LogicalTypeId::BIGINT: {
157-
auto& integers = source->As<clickhouse::ColumnInt64>();
158-
auto& target_vector = FlatVector::GetData<int64_t>(target);
159-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
160-
target_vector[row_idx] = integers->At(row_idx);
161-
}
162-
break;
163-
}
164-
case LogicalTypeId::HUGEINT: {
165-
auto& integers = source->As<clickhouse::ColumnInt128>();
166-
auto& target_vector = FlatVector::GetData<hugeint_t>(target);
167-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
168-
// Assuming ClickHouse returns Int128 as two 64-bit integers
169-
auto value = integers->At(row_idx);
170-
target_vector[row_idx] = hugeint_t(value.high, value.low);
171-
}
172-
break;
173-
}
174-
175-
// Unsigned integer types
176-
case LogicalTypeId::UTINYINT: {
177-
auto& integers = source->As<clickhouse::ColumnUInt8>();
178-
auto& target_vector = FlatVector::GetData<uint8_t>(target);
179-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
180-
target_vector[row_idx] = integers->At(row_idx);
181-
}
182-
break;
183-
}
184-
case LogicalTypeId::USMALLINT: {
185-
auto& integers = source->As<clickhouse::ColumnUInt16>();
186-
auto& target_vector = FlatVector::GetData<uint16_t>(target);
187-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
188-
target_vector[row_idx] = integers->At(row_idx);
189-
}
190-
break;
191-
}
192-
case LogicalTypeId::UINTEGER: {
193-
auto& integers = source->As<clickhouse::ColumnUInt32>();
194-
auto& target_vector = FlatVector::GetData<uint32_t>(target);
195-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
196-
target_vector[row_idx] = integers->At(row_idx);
197-
}
198-
break;
199-
}
200-
case LogicalTypeId::UBIGINT: {
201-
auto& integers = source->As<clickhouse::ColumnUInt64>();
202-
auto& target_vector = FlatVector::GetData<uint64_t>(target);
113+
const auto integers = source->As<clickhouse::ColumnInt32>();
114+
auto target_vector = FlatVector::GetData<int32_t>(target);
203115
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
204116
target_vector[row_idx] = integers->At(row_idx);
205117
}
206118
break;
207119
}
208-
209-
// Floating point types
210-
case LogicalTypeId::FLOAT: {
211-
auto& floats = source->As<clickhouse::ColumnFloat32>();
212-
auto& target_vector = FlatVector::GetData<float>(target);
213-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
214-
target_vector[row_idx] = floats->At(row_idx);
215-
}
216-
break;
217-
}
218-
case LogicalTypeId::DOUBLE: {
219-
auto& doubles = source->As<clickhouse::ColumnFloat64>();
220-
auto& target_vector = FlatVector::GetData<double>(target);
221-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
222-
target_vector[row_idx] = doubles->At(row_idx);
223-
}
224-
break;
225-
}
226-
227-
// Date and Time types
228-
case LogicalTypeId::DATE: {
229-
if (source->Type()->GetCode() == clickhouse::Type::Date32) {
230-
auto& dates = source->As<clickhouse::ColumnDate32>();
231-
auto& target_vector = FlatVector::GetData<date_t>(target);
232-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
233-
// Convert from days since epoch
234-
target_vector[row_idx] = date_t(dates->At(row_idx));
235-
}
236-
} else {
237-
auto& dates = source->As<clickhouse::ColumnDate>();
238-
auto& target_vector = FlatVector::GetData<date_t>(target);
239-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
240-
target_vector[row_idx] = date_t(dates->At(row_idx));
241-
}
242-
}
243-
break;
244-
}
245-
case LogicalTypeId::TIMESTAMP: {
246-
if (source->Type()->GetCode() == clickhouse::Type::DateTime64) {
247-
auto& timestamps = source->As<clickhouse::ColumnDateTime64>();
248-
auto& target_vector = FlatVector::GetData<timestamp_t>(target);
249-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
250-
// Convert from microseconds since epoch
251-
target_vector[row_idx] = timestamp_t(timestamps->At(row_idx));
252-
}
253-
} else {
254-
auto& timestamps = source->As<clickhouse::ColumnDateTime>();
255-
auto& target_vector = FlatVector::GetData<timestamp_t>(target);
256-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
257-
// Convert from seconds since epoch
258-
target_vector[row_idx] = timestamp_t(timestamps->At(row_idx) * Interval::MICROS_PER_SEC);
259-
}
260-
}
261-
break;
262-
}
263-
264-
// Decimal types
265-
case LogicalTypeId::DECIMAL: {
266-
switch (source->Type()->GetCode()) {
267-
case clickhouse::Type::Decimal32: {
268-
auto& decimals = source->As<clickhouse::ColumnDecimal32>();
269-
auto& target_vector = FlatVector::GetData<hugeint_t>(target);
270-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
271-
target_vector[row_idx] = hugeint_t(decimals->At(row_idx));
272-
}
273-
break;
274-
}
275-
case clickhouse::Type::Decimal64: {
276-
auto& decimals = source->As<clickhouse::ColumnDecimal64>();
277-
auto& target_vector = FlatVector::GetData<hugeint_t>(target);
278-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
279-
target_vector[row_idx] = hugeint_t(decimals->At(row_idx));
280-
}
281-
break;
282-
}
283-
case clickhouse::Type::Decimal128: {
284-
auto& decimals = source->As<clickhouse::ColumnDecimal128>();
285-
auto& target_vector = FlatVector::GetData<hugeint_t>(target);
286-
for (idx_t row_idx = 0; row_idx < row_count; row_idx++) {
287-
auto value = decimals->At(row_idx);
288-
target_vector[row_idx] = hugeint_t(value.high, value.low);
289-
}
290-
break;
291-
}
292-
default:
293-
throw NotImplementedException("Uns
120+
// Add remaining type conversions here
121+
default:
122+
throw NotImplementedException("Type not yet implemented in scan function");
294123
}
295124
}
296125
});

0 commit comments

Comments
 (0)