Skip to content

Commit 1c6461b

Browse files
Updates for DuckDB 1.4.2
Signed-off-by: David Christensen <[email protected]>
1 parent 2c7287f commit 1c6461b

30 files changed

+854
-468
lines changed

Pipfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ boto3 = "~=1.40"
99
dbt-core = "~=1.10"
1010
dbt-postgres = "~=1.9"
1111
deepdiff = "~=8.6"
12-
duckdb = "==1.3.2"
1312
flask = "~=3.1"
1413
flask-cors = "~=6.0"
1514
moto = "~=5.1"
@@ -20,6 +19,7 @@ pyspark = {extras = ["sql"], version = "==3.5.3"}
2019
pytest-postgresql = "~=7.0"
2120
psycopg-binary = "~=3.2"
2221
sqlalchemy = "~=2.0"
22+
duckdb = "==1.4.2"
2323

2424
[dev-packages]
2525
black = "==25.9.0"

Pipfile.lock

Lines changed: 250 additions & 173 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

avro

Submodule avro updated 42 files

avro.patch

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,3 +225,230 @@ index a4d8e9f89..cddea00cc 100644
225225
return 0;
226226
}
227227
}
228+
diff --git a/lang/c/src/avro/io.h b/lang/c/src/avro/io.h
229+
index ffbb68dc5..5bb2ee699 100644
230+
--- a/lang/c/src/avro/io.h
231+
+++ b/lang/c/src/avro/io.h
232+
@@ -109,12 +109,17 @@ int avro_file_writer_create_with_codec(const char *path,
233+
int avro_file_writer_create_with_codec_fp(FILE *fp, const char *path, int should_close,
234+
avro_schema_t schema, avro_file_writer_t * writer,
235+
const char *codec, size_t block_size);
236+
+int avro_file_writer_create_with_codec_metadata_fp(FILE *fp, const char *path, int should_close,
237+
+ avro_schema_t schema, avro_file_writer_t * writer,
238+
+ const char *codec, size_t block_size, avro_value_t *meta);
239+
int avro_file_writer_open(const char *path, avro_file_writer_t * writer);
240+
int avro_file_writer_open_bs(const char *path, avro_file_writer_t * writer, size_t block_size);
241+
int avro_file_reader(const char *path, avro_file_reader_t * reader);
242+
int avro_file_reader_fp(FILE *fp, const char *path, int should_close,
243+
avro_file_reader_t * reader);
244+
245+
+int avro_file_reader_json_schema(const char *file_path, const char **json_schema);
246+
+
247+
avro_schema_t
248+
avro_file_reader_get_writer_schema(avro_file_reader_t reader);
249+
250+
diff --git a/lang/c/src/datafile.c b/lang/c/src/datafile.c
251+
index c9d4dfeb6..a2c9d54b1 100644
252+
--- a/lang/c/src/datafile.c
253+
+++ b/lang/c/src/datafile.c
254+
@@ -73,7 +73,7 @@ static int write_sync(avro_file_writer_t w)
255+
return avro_write(w->writer, w->sync, sizeof(w->sync));
256+
}
257+
258+
-static int write_header(avro_file_writer_t w)
259+
+static int write_header(avro_file_writer_t w, avro_value_t *extra_meta)
260+
{
261+
int rval;
262+
uint8_t version = 1;
263+
@@ -82,6 +82,15 @@ static int write_header(avro_file_writer_t w)
264+
const avro_encoding_t *enc = &avro_binary_encoding;
265+
int64_t schema_len;
266+
267+
+ const char *json_schema = NULL;
268+
+ size_t json_schema_len = 0;
269+
+ if (extra_meta != NULL)
270+
+ {
271+
+ avro_value_t meta_schema = {0};
272+
+ check(rval, avro_value_get_by_name(extra_meta, "avroschema", &meta_schema, NULL));
273+
+ check(rval, avro_value_get_string(&meta_schema, &json_schema, &json_schema_len));
274+
+ }
275+
+
276+
/* Generate random sync */
277+
generate_sync(w);
278+
279+
@@ -94,15 +103,24 @@ static int write_header(avro_file_writer_t w)
280+
check(rval, enc->write_string(w->writer, "avro.schema"));
281+
schema_writer =
282+
avro_writer_memory(&w->schema_buf[0], sizeof(w->schema_buf));
283+
- rval = avro_schema_to_json(w->writers_schema, schema_writer);
284+
+
285+
+ if (json_schema) {
286+
+ rval = avro_write(schema_writer, (char *)json_schema, strlen(json_schema));
287+
+ }
288+
+ else {
289+
+ rval = avro_schema_to_json(w->writers_schema, schema_writer);
290+
+ }
291+
+
292+
if (rval) {
293+
avro_writer_free(schema_writer);
294+
return rval;
295+
}
296+
+
297+
schema_len = avro_writer_tell(schema_writer);
298+
avro_writer_free(schema_writer);
299+
check(rval,
300+
enc->write_bytes(w->writer, w->schema_buf, schema_len));
301+
+
302+
check(rval, enc->write_long(w->writer, 0));
303+
return write_sync(w);
304+
}
305+
@@ -140,7 +158,7 @@ file_writer_init_fp(FILE *fp, const char *path, int should_close, const char *mo
306+
#endif
307+
308+
static int
309+
-file_writer_create(FILE *fp, const char *path, int should_close, avro_schema_t schema, avro_file_writer_t w, size_t block_size)
310+
+file_writer_create(FILE *fp, const char *path, int should_close, avro_schema_t schema, avro_file_writer_t w, size_t block_size, avro_value_t *extra_meta)
311+
{
312+
int rval;
313+
314+
@@ -169,7 +187,7 @@ file_writer_create(FILE *fp, const char *path, int should_close, avro_schema_t s
315+
}
316+
317+
w->writers_schema = avro_schema_incref(schema);
318+
- return write_header(w);
319+
+ return write_header(w, extra_meta);
320+
}
321+
322+
int
323+
@@ -196,6 +214,15 @@ int avro_file_writer_create_with_codec(const char *path,
324+
int avro_file_writer_create_with_codec_fp(FILE *fp, const char *path, int should_close,
325+
avro_schema_t schema, avro_file_writer_t * writer,
326+
const char *codec, size_t block_size)
327+
+{
328+
+ return avro_file_writer_create_with_codec_metadata_fp(fp, path, should_close, schema, writer, codec,
329+
+ block_size, NULL);
330+
+}
331+
+
332+
+
333+
+int avro_file_writer_create_with_codec_metadata_fp(FILE *fp, const char *path, int should_close,
334+
+ avro_schema_t schema, avro_file_writer_t * writer,
335+
+ const char *codec, size_t block_size, avro_value_t *extra_meta)
336+
{
337+
avro_file_writer_t w;
338+
int rval;
339+
@@ -226,7 +253,7 @@ int avro_file_writer_create_with_codec_fp(FILE *fp, const char *path, int should
340+
avro_freet(struct avro_file_writer_t_, w);
341+
return rval;
342+
}
343+
- rval = file_writer_create(fp, path, should_close, schema, w, block_size);
344+
+ rval = file_writer_create(fp, path, should_close, schema, w, block_size, extra_meta);
345+
if (rval) {
346+
avro_codec_reset(w->codec);
347+
avro_freet(struct avro_codec_t_, w->codec);
348+
@@ -541,6 +568,93 @@ int avro_file_reader_fp(FILE *fp, const char *path, int should_close,
349+
return 0;
350+
}
351+
352+
+int
353+
+avro_file_reader_json_schema(const char *file_path, const char **json_schema)
354+
+{
355+
+ FILE *file = fopen(file_path, "rb");
356+
+ if (!file) {
357+
+ avro_set_error("Error opening file: %s",
358+
+ strerror(errno));
359+
+ return errno;
360+
+ }
361+
+
362+
+ avro_reader_t reader = avro_reader_file(file);
363+
+
364+
+ int rval;
365+
+
366+
+ char magic[4] = {0};
367+
+
368+
+ /* read magic footer */
369+
+ check(rval, avro_read(reader, magic, sizeof(magic)));
370+
+
371+
+ if (magic[0] != 'O' || magic[1] != 'b' || magic[2] != 'j'
372+
+ || magic[3] != 1)
373+
+ {
374+
+ avro_reader_free(reader);
375+
+ avro_set_error("Incorrect Avro container file magic number");
376+
+ return 1;
377+
+ }
378+
+
379+
+ /* each value is bytes */
380+
+ avro_schema_t meta_values_schema = avro_schema_bytes();
381+
+
382+
+ /* metadata is map */
383+
+ avro_schema_t meta_schema = avro_schema_map(meta_values_schema);
384+
+
385+
+ /* prepare avro interface for the schema */
386+
+ avro_value_iface_t *meta_iface = avro_generic_class_from_schema(meta_schema);
387+
+
388+
+ if (meta_iface == NULL)
389+
+ {
390+
+ avro_reader_free(reader);
391+
+ avro_set_error("Cannot create metadata interface");
392+
+ return 1;
393+
+ }
394+
+
395+
+ /* read avro metadata */
396+
+ avro_value_t meta;
397+
+
398+
+ if (avro_generic_value_new(meta_iface, &meta) != 0)
399+
+ {
400+
+ avro_reader_free(reader);
401+
+ avro_set_error("Cannot create metadata value");
402+
+ return 1;
403+
+ }
404+
+
405+
+ if (avro_value_read(reader, &meta) != 0)
406+
+ {
407+
+ avro_reader_free(reader);
408+
+ avro_set_error("Cannot read file header");
409+
+ return 1;
410+
+ }
411+
+
412+
+ /* read "avro.schema" from the metadata */
413+
+ avro_value_t schema_bytes;
414+
+
415+
+ if (avro_value_get_by_name(&meta, "avro.schema", &schema_bytes, NULL) != 0)
416+
+ {
417+
+ avro_reader_free(reader);
418+
+ avro_set_error("File header doesn't contain a schema");
419+
+ return 1;
420+
+ }
421+
+
422+
+ const void *p = NULL;
423+
+ size_t len = 0;
424+
+
425+
+ avro_value_get_bytes(&schema_bytes, &p, &len);
426+
+
427+
+ char *schema = avro_malloc(len + 1);
428+
+
429+
+ memcpy((void *) schema, p, len);
430+
+ schema[len] = '\0';
431+
+
432+
+ *json_schema = schema;
433+
+
434+
+ avro_reader_free(reader);
435+
+
436+
+ return 0;
437+
+}
438+
+
439+
int avro_file_reader(const char *path, avro_file_reader_t * reader)
440+
{
441+
FILE *fp;
442+
diff --git a/lang/c/src/schema.c b/lang/c/src/schema.c
443+
index a4d8e9f89..cddea00cc 100644
444+
--- a/lang/c/src/schema.c
445+
+++ b/lang/c/src/schema.c
446+
@@ -56,7 +56,7 @@ static int is_avro_id(const char *name)
447+
}
448+
for (i = 0; i < len; i++) {
449+
if (!(isalpha(name[i])
450+
- || name[i] == '_' || (i && isdigit(name[i])))) {
451+
+ || name[i] == '_' || name[i] == '.' || (i && isdigit(name[i])))) {
452+
return 0;
453+
}
454+
}

duckdb_pglake/Makefile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ EXT_FLAGS=-DBUILD_EXTENSIONS="tpch;json;icu"
1111
# GitHub actions uses a hash of the Makefile and source
1212
# files as a cache key. When we update the DuckDB version,
1313
# we should update the line below to trigger a new hash.
14-
DUCKDB_VERSION=1.3.2
14+
DUCKDB_VERSION=1.4.2
1515

1616
# We install libduckdb.so into the postgres direcotry
1717
PG_CONFIG ?= pg_config
@@ -126,21 +126,22 @@ release: patch_duckdb
126126
# patch exit code 1 means the patch is already applied, since we might run this multiple times, be robust
127127
patch_duckdb:
128128
cd duckdb && \
129-
find ../patches/duckdb -type f -name '*.patch' -print | xargs -0 printf '%s ' | \
129+
find ../patches/duckdb -type f -name '*.patch' -print | sort | xargs -0 printf '%s ' | \
130130
while read -r patch; do \
131131
patch -l -p1 -N < "$$patch" || [ $$? -eq 1 ]; \
132132
done
133133
cd duckdb-postgres && \
134-
find ../patches/duckdb-postgres -type f -name '*.patch' -print | xargs -0 printf '%s ' | \
134+
find ../patches/duckdb-postgres -type f -name '*.patch' -print | sort | xargs -0 printf '%s ' | \
135135
while read -r patch; do \
136136
patch -l -p1 -N < "$$patch" || [ $$? -eq 1 ]; \
137137
done
138138
cd duckdb-azure && \
139-
find ../patches/duckdb-azure -type f -name '*.patch' -print | xargs -0 printf '%s ' | \
139+
find ../patches/duckdb-azure -type f -name '*.patch' -print | sort | xargs -0 printf '%s ' | \
140140
while read -r patch; do \
141141
patch -l -p1 -N < "$$patch" || [ $$? -eq 1 ]; \
142142
done
143143

144+
144145
format:
145146
find src/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i
146147
cmake-format -i CMakeLists.txt

duckdb_pglake/duckdb

Submodule duckdb updated 3980 files

duckdb_pglake/duckdb-postgres

Submodule duckdb-postgres updated 50 files

duckdb_pglake/extension_config.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# External extensions to link into libduckdb
22
duckdb_extension_load(httpfs
33
GIT_URL https://github.com/duckdb/duckdb-httpfs
4-
GIT_TAG c93303d2654d1725db3e9f4dbe1c053586f9f3f2
5-
INCLUDE_DIR extension/httpfs/include
4+
GIT_TAG 39ebaf77e93a55b2bb839b621794eba49b2e359b
5+
INCLUDE_DIR src/include
6+
ADD_PATCHES
67
)
78

89
# Extension from this repo

0 commit comments

Comments
 (0)