1
1
#include " clickhouse_scan.hpp"
2
2
#include " duckdb/common/exception.hpp"
3
3
#include " duckdb/main/secret/secret_manager.hpp"
4
- #include " duckdb/common/types/chunk_collection.hpp"
4
+ #include " duckdb/function/table_function.hpp"
5
+ #include " duckdb/main/client_context.hpp"
6
+ #include " duckdb/common/string_util.hpp"
7
+ #include " duckdb/main/extension_util.hpp"
5
8
#include < clickhouse/client.h>
6
9
7
10
namespace duckdb {
@@ -22,7 +25,7 @@ struct ClickHouseBindData : public TableFunctionData {
22
25
};
23
26
24
27
// Convert ClickHouse type to DuckDB LogicalType
25
- static LogicalType ConvertClickHouseType (const clickhouse::ColumnRef& column) {
28
+ static LogicalType ConvertClickHouseType (const clickhouse::ColumnRef column) {
26
29
switch (column->Type ()->GetCode ()) {
27
30
// Integer types
28
31
case clickhouse::Type::Int8:
@@ -33,8 +36,6 @@ static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef& column) {
33
36
return LogicalType::INTEGER;
34
37
case clickhouse::Type::Int64:
35
38
return LogicalType::BIGINT;
36
- case clickhouse::Type::Int128:
37
- return LogicalType::HUGEINT;
38
39
39
40
// Unsigned integer types
40
41
case clickhouse::Type::UInt8:
@@ -59,31 +60,20 @@ static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef& column) {
59
60
60
61
// Date and Time types
61
62
case clickhouse::Type::Date:
62
- return LogicalType::DATE;
63
63
case clickhouse::Type::Date32:
64
64
return LogicalType::DATE;
65
65
case clickhouse::Type::DateTime:
66
- return LogicalType::TIMESTAMP;
67
66
case clickhouse::Type::DateTime64:
68
67
return LogicalType::TIMESTAMP;
69
68
70
- // Boolean type
71
- case clickhouse::Type::Nothing:
72
- return LogicalType::BOOLEAN;
73
-
74
- // Decimal types
75
- case clickhouse::Type::Decimal:
76
- case clickhouse::Type::Decimal32:
77
- case clickhouse::Type::Decimal64:
78
- case clickhouse::Type::Decimal128:
79
- // Get precision and scale from the type
80
- auto decimal_type = static_cast <const clickhouse::DecimalType*>(column->Type ().get ());
81
- return LogicalType::DECIMAL (decimal_type->GetPrecision (), decimal_type->GetScale ());
69
+ // Default to VARCHAR for unsupported types
70
+ default :
71
+ return LogicalType::VARCHAR;
82
72
}
83
73
}
84
74
85
75
static void ClickHouseScanFunction (ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
86
- auto &bind_data = data_p.bind_data ->Cast <ClickHouseBindData>();
76
+ auto &bind_data = const_cast <ClickHouseBindData&>( data_p.bind_data ->Cast <ClickHouseBindData>() );
87
77
88
78
if (bind_data.finished ) {
89
79
return ;
@@ -105,192 +95,31 @@ static void ClickHouseScanFunction(ClientContext &context, TableFunctionInput &d
105
95
output.SetCardinality (row_count);
106
96
107
97
for (idx_t col_idx = 0 ; col_idx < block.GetColumnCount (); col_idx++) {
108
- auto & target = output. data [col_idx];
109
- auto & source = block [col_idx];
98
+ const auto source = block [col_idx];
99
+ auto &target = output. data [col_idx];
110
100
111
101
// Convert and copy data based on type
112
102
switch (bind_data.types [col_idx].id ()) {
113
- // String types
114
103
case LogicalTypeId::VARCHAR: {
115
- if (source->Type ()->GetCode () == clickhouse::Type::FixedString) {
116
- auto & strings = source->As <clickhouse::ColumnFixedString>();
117
- auto & target_vector = FlatVector::GetData<string_t >(target);
118
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
119
- target_vector[row_idx] = StringVector::AddString (target, strings->At (row_idx));
120
- }
121
- } else {
122
- auto & strings = source->As <clickhouse::ColumnString>();
123
- auto & target_vector = FlatVector::GetData<string_t >(target);
124
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
125
- target_vector[row_idx] = StringVector::AddString (target, strings->At (row_idx));
126
- }
127
- }
128
- break ;
129
- }
130
-
131
- // Integer types
132
- case LogicalTypeId::TINYINT: {
133
- auto & integers = source->As <clickhouse::ColumnInt8>();
134
- auto & target_vector = FlatVector::GetData<int8_t >(target);
104
+ const auto strings = source->As <clickhouse::ColumnString>();
105
+ auto target_vector = FlatVector::GetData<string_t >(target);
135
106
for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
136
- target_vector[row_idx] = integers->At (row_idx);
137
- }
138
- break ;
139
- }
140
- case LogicalTypeId::SMALLINT: {
141
- auto & integers = source->As <clickhouse::ColumnInt16>();
142
- auto & target_vector = FlatVector::GetData<int16_t >(target);
143
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
144
- target_vector[row_idx] = integers->At (row_idx);
107
+ auto sv = strings->At (row_idx);
108
+ target_vector[row_idx] = StringVector::AddString (target, sv.data (), sv.size ());
145
109
}
146
110
break ;
147
111
}
148
112
case LogicalTypeId::INTEGER: {
149
- auto & integers = source->As <clickhouse::ColumnInt32>();
150
- auto & target_vector = FlatVector::GetData<int32_t >(target);
151
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
152
- target_vector[row_idx] = integers->At (row_idx);
153
- }
154
- break ;
155
- }
156
- case LogicalTypeId::BIGINT: {
157
- auto & integers = source->As <clickhouse::ColumnInt64>();
158
- auto & target_vector = FlatVector::GetData<int64_t >(target);
159
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
160
- target_vector[row_idx] = integers->At (row_idx);
161
- }
162
- break ;
163
- }
164
- case LogicalTypeId::HUGEINT: {
165
- auto & integers = source->As <clickhouse::ColumnInt128>();
166
- auto & target_vector = FlatVector::GetData<hugeint_t >(target);
167
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
168
- // Assuming ClickHouse returns Int128 as two 64-bit integers
169
- auto value = integers->At (row_idx);
170
- target_vector[row_idx] = hugeint_t (value.high , value.low );
171
- }
172
- break ;
173
- }
174
-
175
- // Unsigned integer types
176
- case LogicalTypeId::UTINYINT: {
177
- auto & integers = source->As <clickhouse::ColumnUInt8>();
178
- auto & target_vector = FlatVector::GetData<uint8_t >(target);
179
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
180
- target_vector[row_idx] = integers->At (row_idx);
181
- }
182
- break ;
183
- }
184
- case LogicalTypeId::USMALLINT: {
185
- auto & integers = source->As <clickhouse::ColumnUInt16>();
186
- auto & target_vector = FlatVector::GetData<uint16_t >(target);
187
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
188
- target_vector[row_idx] = integers->At (row_idx);
189
- }
190
- break ;
191
- }
192
- case LogicalTypeId::UINTEGER: {
193
- auto & integers = source->As <clickhouse::ColumnUInt32>();
194
- auto & target_vector = FlatVector::GetData<uint32_t >(target);
195
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
196
- target_vector[row_idx] = integers->At (row_idx);
197
- }
198
- break ;
199
- }
200
- case LogicalTypeId::UBIGINT: {
201
- auto & integers = source->As <clickhouse::ColumnUInt64>();
202
- auto & target_vector = FlatVector::GetData<uint64_t >(target);
113
+ const auto integers = source->As <clickhouse::ColumnInt32>();
114
+ auto target_vector = FlatVector::GetData<int32_t >(target);
203
115
for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
204
116
target_vector[row_idx] = integers->At (row_idx);
205
117
}
206
118
break ;
207
119
}
208
-
209
- // Floating point types
210
- case LogicalTypeId::FLOAT: {
211
- auto & floats = source->As <clickhouse::ColumnFloat32>();
212
- auto & target_vector = FlatVector::GetData<float >(target);
213
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
214
- target_vector[row_idx] = floats->At (row_idx);
215
- }
216
- break ;
217
- }
218
- case LogicalTypeId::DOUBLE: {
219
- auto & doubles = source->As <clickhouse::ColumnFloat64>();
220
- auto & target_vector = FlatVector::GetData<double >(target);
221
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
222
- target_vector[row_idx] = doubles->At (row_idx);
223
- }
224
- break ;
225
- }
226
-
227
- // Date and Time types
228
- case LogicalTypeId::DATE: {
229
- if (source->Type ()->GetCode () == clickhouse::Type::Date32) {
230
- auto & dates = source->As <clickhouse::ColumnDate32>();
231
- auto & target_vector = FlatVector::GetData<date_t >(target);
232
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
233
- // Convert from days since epoch
234
- target_vector[row_idx] = date_t (dates->At (row_idx));
235
- }
236
- } else {
237
- auto & dates = source->As <clickhouse::ColumnDate>();
238
- auto & target_vector = FlatVector::GetData<date_t >(target);
239
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
240
- target_vector[row_idx] = date_t (dates->At (row_idx));
241
- }
242
- }
243
- break ;
244
- }
245
- case LogicalTypeId::TIMESTAMP: {
246
- if (source->Type ()->GetCode () == clickhouse::Type::DateTime64) {
247
- auto & timestamps = source->As <clickhouse::ColumnDateTime64>();
248
- auto & target_vector = FlatVector::GetData<timestamp_t >(target);
249
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
250
- // Convert from microseconds since epoch
251
- target_vector[row_idx] = timestamp_t (timestamps->At (row_idx));
252
- }
253
- } else {
254
- auto & timestamps = source->As <clickhouse::ColumnDateTime>();
255
- auto & target_vector = FlatVector::GetData<timestamp_t >(target);
256
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
257
- // Convert from seconds since epoch
258
- target_vector[row_idx] = timestamp_t (timestamps->At (row_idx) * Interval::MICROS_PER_SEC);
259
- }
260
- }
261
- break ;
262
- }
263
-
264
- // Decimal types
265
- case LogicalTypeId::DECIMAL: {
266
- switch (source->Type ()->GetCode ()) {
267
- case clickhouse::Type::Decimal32: {
268
- auto & decimals = source->As <clickhouse::ColumnDecimal32>();
269
- auto & target_vector = FlatVector::GetData<hugeint_t >(target);
270
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
271
- target_vector[row_idx] = hugeint_t (decimals->At (row_idx));
272
- }
273
- break ;
274
- }
275
- case clickhouse::Type::Decimal64: {
276
- auto & decimals = source->As <clickhouse::ColumnDecimal64>();
277
- auto & target_vector = FlatVector::GetData<hugeint_t >(target);
278
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
279
- target_vector[row_idx] = hugeint_t (decimals->At (row_idx));
280
- }
281
- break ;
282
- }
283
- case clickhouse::Type::Decimal128: {
284
- auto & decimals = source->As <clickhouse::ColumnDecimal128>();
285
- auto & target_vector = FlatVector::GetData<hugeint_t >(target);
286
- for (idx_t row_idx = 0 ; row_idx < row_count; row_idx++) {
287
- auto value = decimals->At (row_idx);
288
- target_vector[row_idx] = hugeint_t (value.high , value.low );
289
- }
290
- break ;
291
- }
292
- default :
293
- throw NotImplementedException (" Uns
120
+ // Add remaining type conversions here
121
+ default :
122
+ throw NotImplementedException (" Type not yet implemented in scan function" );
294
123
}
295
124
}
296
125
});
0 commit comments