@@ -161,6 +161,47 @@ def make_draken_extension(module_path, source_file, language="c++", depends=None
161161 depends = depends ,
162162 )
163163
164+
165+ def get_parquet_vendor_sources ():
166+ """Return vendored zstd/snappy source files to build into parquet extension.
167+
168+ We only compile the decompression bits (zstd) and minimal snappy sources we
169+ need for decompression. The vendor code is included inside the project, so
170+ building them into the extension avoids linking to system libraries and
171+ avoids runtime missing symbol errors.
172+ """
173+ vendor_sources = []
174+ RUGO_PARQUET = "third_party/mabel/rugo/parquet"
175+
176+ # Snappy sources (minimal subset for decompress)
177+ snappy_sources = [
178+ f"{ RUGO_PARQUET } /vendor/snappy/snappy.cc" ,
179+ f"{ RUGO_PARQUET } /vendor/snappy/snappy-sinksource.cc" ,
180+ f"{ RUGO_PARQUET } /vendor/snappy/snappy-stubs-internal.cc" ,
181+ ]
182+ vendor_sources .extend (snappy_sources )
183+
184+ # Zstd decompression sources
185+ zstd_sources = [
186+ f"{ RUGO_PARQUET } /vendor/zstd/common/entropy_common.cpp" ,
187+ f"{ RUGO_PARQUET } /vendor/zstd/common/fse_decompress.cpp" ,
188+ f"{ RUGO_PARQUET } /vendor/zstd/common/zstd_common.cpp" ,
189+ f"{ RUGO_PARQUET } /vendor/zstd/common/xxhash.cpp" ,
190+ f"{ RUGO_PARQUET } /vendor/zstd/common/error_private.cpp" ,
191+ f"{ RUGO_PARQUET } /vendor/zstd/decompress/zstd_decompress.cpp" ,
192+ f"{ RUGO_PARQUET } /vendor/zstd/decompress/zstd_decompress_block.cpp" ,
193+ f"{ RUGO_PARQUET } /vendor/zstd/decompress/huf_decompress.cpp" ,
194+ f"{ RUGO_PARQUET } /vendor/zstd/decompress/zstd_ddict.cpp" ,
195+ ]
196+
197+ # Optionally include the x86 optimized huf path when building for x86_64
198+ machine = detect_architecture ()
199+ if machine in ("x86_64" , "amd64" ):
200+ zstd_sources .append (f"{ RUGO_PARQUET } /vendor/zstd/decompress/huf_decompress_amd64.S" )
201+
202+ vendor_sources .extend (zstd_sources )
203+ return vendor_sources
204+
164205# Define all extensions
165206extensions = [
166207
@@ -231,17 +272,28 @@ def make_draken_extension(module_path, source_file, language="c++", depends=None
231272 # File format readers
232273 Extension (
233274 "opteryx.rugo.parquet" ,
234- sources = [
235- "third_party/mabel/rugo/parquet/parquet_reader.pyx" ,
236- "third_party/mabel/rugo/parquet/metadata.cpp" ,
237- "third_party/mabel/rugo/parquet/decode.cpp" ,
238- "third_party/mabel/rugo/parquet/compression.cpp" ,
239- "third_party/mabel/rugo/parquet/bloom_filter.cpp" ,
240- ],
241- include_dirs = include_dirs ,
275+ sources = (
276+ [
277+ "third_party/mabel/rugo/parquet/parquet_reader.pyx" ,
278+ "third_party/mabel/rugo/parquet/metadata.cpp" ,
279+ "third_party/mabel/rugo/parquet/decode.cpp" ,
280+ "third_party/mabel/rugo/parquet/compression.cpp" ,
281+ "third_party/mabel/rugo/parquet/bloom_filter.cpp" ,
282+ ] + get_parquet_vendor_sources ()
283+ ),
284+ include_dirs = (
285+ include_dirs
286+ + [
287+ "third_party/mabel/rugo/parquet/vendor/snappy" ,
288+ "third_party/mabel/rugo/parquet/vendor/zstd" ,
289+ "third_party/mabel/rugo/parquet/vendor/zstd/common" ,
290+ "third_party/mabel/rugo/parquet/vendor/zstd/decompress" ,
291+ ]
292+ ),
293+ define_macros = [("HAVE_SNAPPY" , "1" ), ("HAVE_ZSTD" , "1" ), ("ZSTD_STATIC_LINKING_ONLY" , "1" )],
242294 language = "c++" ,
243295 extra_compile_args = CPP_FLAGS ,
244- libraries = ([] if is_mac () else [ "zstd" , "snappy" ]) ,
296+ extra_link_args = [] ,
245297 ),
246298 Extension (
247299 "opteryx.rugo.jsonl" ,
@@ -463,6 +515,13 @@ def generate_consolidated_module(module_dir, output_file):
463515generate_consolidated_module ("opteryx/compiled/joins" , "opteryx/compiled/joins/joins.pyx" )
464516
465517# Add consolidated modules with their dependencies
518+ # Link args for list_ops (use -lcrypto on non-macOS and -pthread where appropriate)
519+ list_ops_link_args = []
520+ if not is_mac ():
521+ list_ops_link_args .append ("-lcrypto" )
522+ if not is_win ():
523+ list_ops_link_args .append ("-pthread" )
524+
466525extensions .extend ([
467526 Extension (
468527 "opteryx.compiled.list_ops.function_definitions" ,
@@ -479,7 +538,7 @@ def generate_consolidated_module(module_dir, output_file):
479538 include_dirs = include_dirs ,
480539 language = "c++" ,
481540 extra_compile_args = CPP_FLAGS ,
482- libraries = ([] if is_mac () else [ "crypto" ]) ,
541+ extra_link_args = list_ops_link_args ,
483542 ),
484543 Extension (
485544 "opteryx.compiled.joins.join_definitions" ,
0 commit comments