Skip to content

Commit a502b11

Browse files
committed
CI Errors
1 parent b9b2a3c commit a502b11

File tree

4 files changed

+137
-5
lines changed

4 files changed

+137
-5
lines changed

opteryx/__version__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
22
# DO NOT EDIT THIS FILE DIRECTLY
33

4-
__build__ = 1935
4+
__build__ = 1936
55
__author__ = "@joocer"
6-
__version__ = "0.26.2-beta.1935"
6+
__version__ = "0.26.2-beta.1936"
77

88
# Store the version here so:
99
# 1) we don't load dependencies by storing it in __init__.py

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "opteryx"
3-
version = "0.26.2-beta.1935"
3+
version = "0.26.2-beta.1936"
44
description = "Query your data, where it lives"
55
requires-python = '>=3.11'
66
readme = {file = "README.md", content-type = "text/markdown"}

setup.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,10 @@ def get_parquet_vendor_sources():
180180
C_COMPILE_FLAGS = ["-O3"]
181181
if is_mac():
182182
CPP_COMPILE_FLAGS += ["-std=c++17"]
183-
C_COMPILE_FLAGS += ["-std=c17"]
183+
# Use a C standard that is widely supported; avoid passing a C
184+
# language flag that could be accidentally applied to C++ sources
185+
# when the build system compiles mixed-language extensions.
186+
C_COMPILE_FLAGS += ["-std=c11"]
184187
elif is_win():
185188
CPP_COMPILE_FLAGS += ["/std:c++17"]
186189
C_COMPILE_FLAGS += ["/std:c17"]
@@ -360,7 +363,7 @@ def make_draken_extension(module_path, source_file, depends=None, language=None,
360363
sources=[
361364
"opteryx/third_party/tktech/csimdjson.pyx",
362365
"third_party/tktech/simdjson/simdjson.cpp",
363-
"third_party/tktech/simdjson/util.cpp",
366+
"src/cpp/simdjson_error_shim.cpp",
364367
],
365368
include_dirs=include_dirs + ["third_party/tktech/simdjson"],
366369
language="c++",
@@ -761,6 +764,45 @@ def make_draken_extension(module_path, source_file, depends=None, language=None,
761764
ext.language = "c++"
762765
ext.extra_compile_args = CPP_COMPILE_FLAGS
763766

767+
# Ensure extensions that include any C++ source file use the C++
768+
# compile flags; some extensions earlier mistakenly received
769+
# C flags which can include `-std=c17` and break when clang++ is
770+
# used to compile/link the module.
771+
for ext in extensions:
772+
try:
773+
srcs = ext.sources or []
774+
except Exception:
775+
srcs = []
776+
# Determine if this extension has C and/or C++ sources.
777+
has_cpp = any(str(s).endswith((".cpp", ".cxx", ".cc")) for s in srcs)
778+
has_c = any(str(s).endswith(".c") for s in srcs)
779+
# Extensions with Cython sources will generate C files at build
780+
# time. Treat `.pyx` sources as C for the purposes of deciding
781+
# whether to avoid passing C++-only std flags to the C compiler.
782+
has_pyx = any(str(s).endswith(".pyx") for s in srcs)
783+
if has_pyx:
784+
has_c = True
785+
786+
# If the extension contains both C and C++ sources, avoid passing
787+
# the C++ language standard flag (e.g. -std=c++17) to the C
788+
# compiler. Distutils applies `extra_compile_args` to each
789+
# compilation command, so a C compiler will reject C++-only
790+
# flags. In that mixed-source case we filter out std flags but
791+
# keep other safe optimizations.
792+
if has_cpp:
793+
if has_c:
794+
def _filter_cpp_std(flags):
795+
out = []
796+
for flag in flags:
797+
if flag.startswith("-std=c++") or flag.startswith("/std:c++"):
798+
continue
799+
out.append(flag)
800+
return out
801+
802+
ext.extra_compile_args = _filter_cpp_std(CPP_COMPILE_FLAGS)
803+
else:
804+
ext.extra_compile_args = CPP_COMPILE_FLAGS
805+
764806
extensions.append(
765807
Extension(
766808
name="opteryx.compiled.list_ops.function_definitions",

src/cpp/simdjson_error_shim.cpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Small shim to provide simdjson_error_handler symbol when the vendored
2+
// simdjson/util.cpp is not compiled into this extension. This avoids
3+
// import-time undefined-symbol errors on macOS. The implementation is a
4+
// minimal translator mapping simdjson exceptions to Python exceptions.
5+
6+
// Minimal shim that converts any active C++ exception into a Python exception.
7+
// Avoid including the full simdjson headers here to prevent symbol duplication
8+
// and large in-object code generation; catch std::exception to retrieve
9+
// the message when available.
10+
11+
#define PY_SSIZE_T_CLEAN
12+
#include <Python.h>
13+
#include <exception>
14+
15+
void simdjson_error_handler() {
16+
try {
17+
if (PyErr_Occurred()) {
18+
return; // preserve existing Python exception
19+
} else {
20+
throw; // rethrow the active C++ exception
21+
}
22+
} catch (const std::exception &e) {
23+
const char *msg = e.what();
24+
if (!msg) msg = "simdjson: unknown error";
25+
26+
// Map known simdjson error codes (present in exception message)
27+
// to the appropriate Python exception types. This mirrors the
28+
// behavior in the original vendored `util.cpp` without
29+
// including simdjson headers here (avoids symbol duplication).
30+
if (strstr(msg, "NO_SUCH_FIELD") != NULL) {
31+
PyErr_SetString(PyExc_KeyError, msg);
32+
return;
33+
}
34+
if (strstr(msg, "INDEX_OUT_OF_BOUNDS") != NULL) {
35+
PyErr_SetString(PyExc_IndexError, msg);
36+
return;
37+
}
38+
if (strstr(msg, "INCORRECT_TYPE") != NULL) {
39+
PyErr_SetString(PyExc_TypeError, msg);
40+
return;
41+
}
42+
if (strstr(msg, "MEMALLOC") != NULL) {
43+
PyErr_SetNone(PyExc_MemoryError);
44+
return;
45+
}
46+
47+
// ValueError group
48+
if (strstr(msg, "EMPTY") != NULL || strstr(msg, "STRING_ERROR") != NULL ||
49+
strstr(msg, "T_ATOM_ERROR") != NULL || strstr(msg, "F_ATOM_ERROR") != NULL ||
50+
strstr(msg, "N_ATOM_ERROR") != NULL || strstr(msg, "NUMBER_ERROR") != NULL ||
51+
strstr(msg, "UNESCAPED_CHARS") != NULL || strstr(msg, "UNCLOSED_STRING") != NULL ||
52+
strstr(msg, "NUMBER_OUT_OF_RANGE") != NULL || strstr(msg, "INVALID_JSON_POINTER") != NULL ||
53+
strstr(msg, "INVALID_URI_FRAGMENT") != NULL || strstr(msg, "CAPACITY") != NULL ||
54+
strstr(msg, "TAPE_ERROR") != NULL) {
55+
PyErr_SetString(PyExc_ValueError, msg);
56+
return;
57+
}
58+
59+
if (strstr(msg, "IO_ERROR") != NULL) {
60+
PyErr_SetString(PyExc_IOError, msg);
61+
return;
62+
}
63+
64+
if (strstr(msg, "UTF8_ERROR") != NULL) {
65+
PyObject *unicode_error = PyObject_CallFunction(
66+
PyExc_UnicodeDecodeError,
67+
"sy#nns",
68+
"utf-8",
69+
"",
70+
0,
71+
0,
72+
1,
73+
msg
74+
);
75+
if (unicode_error) {
76+
PyErr_SetObject(PyExc_UnicodeDecodeError, unicode_error);
77+
Py_XDECREF(unicode_error);
78+
} else {
79+
PyErr_SetString(PyExc_UnicodeDecodeError, msg);
80+
}
81+
return;
82+
}
83+
84+
PyErr_SetString(PyExc_RuntimeError, msg);
85+
return;
86+
} catch (...) {
87+
PyErr_SetString(PyExc_RuntimeError, "simdjson: unknown error");
88+
return;
89+
}
90+
}

0 commit comments

Comments
 (0)