From aa81dd1cd43c2ffb2cf6ba388d3ccbf5185d78f8 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 14 Mar 2025 21:46:34 -0400 Subject: [PATCH 01/35] Allow per-interpreter internals/local_internals --- CMakeLists.txt | 4 ++ include/pybind11/detail/internals.h | 72 ++++++++++++++++++++++++++- include/pybind11/pybind11.h | 62 +++++++++++++++++++++++ tests/test_embed/CMakeLists.txt | 2 + tests/test_embed/external_module.cpp | 2 +- tests/test_embed/test_interpreter.cpp | 25 ++++++++-- 6 files changed, 162 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d570112dd1..bba96fb66e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,6 +92,7 @@ option(PYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION "To enforce that a handle_type_name<> specialization exists" OFF) option(PYBIND11_SIMPLE_GIL_MANAGEMENT "Use simpler GIL management logic that does not support disassociation" OFF) +option(PYBIND11_SUBINTERPRETER_SUPPORT "Enable support for sub-interpreters" OFF) set(PYBIND11_INTERNALS_VERSION "" CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.") @@ -103,6 +104,9 @@ endif() if(PYBIND11_SIMPLE_GIL_MANAGEMENT) add_compile_definitions(PYBIND11_SIMPLE_GIL_MANAGEMENT) endif() +if(PYBIND11_SUBINTERPRETER_SUPPORT) + add_compile_definitions(PYBIND11_SUBINTERPRETER_SUPPORT) +endif() cmake_dependent_option( USE_PYTHON_INCLUDE_DIR diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 3687b48744..b1dd286c5c 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -252,7 +252,25 @@ struct type_info { /// Each module locally stores a pointer to the `internals` data. The data /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. inline internals **&get_internals_pp() { +#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON) || PY_VERSION_HEX < 0x030C0000 \ + || !defined(PYBIND11_SUBINTERPRETER_SUPPORT) static internals **internals_pp = nullptr; +#else + static thread_local internals **internals_pp = nullptr; + // This is one per interpreter, we cache it but if the thread changed + // then we need to invalidate our cache + // the caller will find the right value and set it if its null + static thread_local PyThreadState *tstate_cached = nullptr; +# if PY_VERSION_HEX < 0x030D0000 + PyThreadState *tstate = _PyThreadState_UncheckedGet(); +# else + PyThreadState *tstate = PyThreadState_GetUnchecked(); +# endif + if (tstate != tstate_cached) { + tstate_cached = tstate; + internals_pp = nullptr; + } +#endif return internals_pp; } @@ -435,7 +453,12 @@ PYBIND11_NOINLINE internals &get_internals() { // libc++ with CPython doesn't require this (types are explicitly exported) // libc++ with PyPy still need it, awaiting further investigation #if !defined(__GLIBCXX__) - (*internals_pp)->registered_exception_translators.push_front(&translate_local_exception); + if ((*internals_pp)->registered_exception_translators.empty() + || (*internals_pp)->registered_exception_translators.front() + != &translate_local_exception) { + (*internals_pp) + ->registered_exception_translators.push_front(&translate_local_exception); + } #endif } else { if (!internals_pp) { @@ -495,7 +518,54 @@ inline local_internals &get_local_internals() { // static deinitialization fiasco. In order to avoid it we avoid destruction of the // local_internals static. One can read more about the problem and current solution here: // https://google.github.io/styleguide/cppguide.html#Static_and_Global_Variables + +#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON) || PY_VERSION_HEX < 0x030C0000 \ + || !defined(PYBIND11_SUBINTERPRETER_SUPPORT) static auto *locals = new local_internals(); +#else + static thread_local local_internals *locals = nullptr; + // This is one per interpreter, we cache it but if the interpreter changed + // then we need to invalidate our cache and re-fetch from the state dict + static thread_local PyThreadState *tstate_cached = nullptr; +# if PY_VERSION_HEX < 0x030D0000 + PyThreadState *tstate = _PyThreadState_UncheckedGet(); +# else + PyThreadState *tstate = PyThreadState_GetUnchecked(); +# endif + if (!tstate) { + pybind11_fail( + "pybind11::detail::get_local_internals() called without a current python thread"); + } + if (tstate != tstate_cached) { + // we create a unique value at first run which is based on a pointer to + // a (non-thread_local) static value in this function, then multiple + // loaded modules using this code will still each have a unique key. + static const std::string this_module_idstr + = PYBIND11_MODULE_LOCAL_ID + + std::to_string(reinterpret_cast(&this_module_idstr)); + + // Ensure that the GIL is held since we will need to make Python calls. + // Cannot use py::gil_scoped_acquire here since that constructor calls get_internals. + gil_scoped_acquire_simple gil; + error_scope err_scope; + dict state_dict = get_python_state_dict(); + object local_capsule = reinterpret_steal( + dict_getitemstringref(state_dict.ptr(), this_module_idstr.c_str())); + if (!local_capsule) { + locals = new local_internals(); + state_dict[this_module_idstr.c_str()] = capsule(reinterpret_cast(locals)); + } else { + void *ptr = PyCapsule_GetPointer(local_capsule.ptr(), nullptr); + if (!ptr) { + raise_from(PyExc_SystemError, "pybind11::detail::get_local_internals() FAILED"); + throw error_already_set(); + } + locals = reinterpret_cast(ptr); + } + tstate_cached = tstate; + } +#endif + return *locals; } diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index e05380947c..92a4495594 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1267,6 +1267,26 @@ class mod_gil_not_used { bool flag_; }; +// Use to activate Py_MOD_PER_INTERPRETER_GIL_SUPPORTED +class mod_per_interpreter_gil { +public: + explicit mod_per_interpreter_gil(bool flag = true) : flag_(flag) {} + bool flag() const { return flag_; } + +private: + bool flag_; +}; + +// Use to activate Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED +class mod_multi_interpreter_one_gil { +public: + explicit mod_multi_interpreter_one_gil(bool flag = true) : flag_(flag) {} + bool flag() const { return flag_; } + +private: + bool flag_; +}; + PYBIND11_NAMESPACE_BEGIN(detail) inline bool gil_not_used_option() { return false; } @@ -1281,6 +1301,35 @@ inline bool gil_not_used_option(F &&, O &&...o) { return gil_not_used_option(o...); } +#ifdef Py_mod_multiple_interpreters +inline void *multi_interp_option() { return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; } +# ifdef PYBIND11_SUBINTERPRETER_SUPPORT +template +void *multi_interp_option(F &&, O &&...o); +template +void *multi_interp_option(mod_multi_interpreter_one_gil f, O &&...o); +template +inline void *multi_interp_option(mod_per_interpreter_gil f, O &&...o) { + if (f.flag()) { + return Py_MOD_PER_INTERPRETER_GIL_SUPPORTED; + } + return multi_interp_option(o...); +} +template +inline void *multi_interp_option(mod_multi_interpreter_one_gil f, O &&...o) { + void *others = multi_interp_option(o...); + if (!f.flag() || others == Py_MOD_PER_INTERPRETER_GIL_SUPPORTED) { + return others; + } + return Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED; +} +# endif +template +inline void *multi_interp_option(F &&, O &&...o) { + return multi_interp_option(o...); +} +#endif + PYBIND11_NAMESPACE_END(detail) /// Wrapper for Python extension modules @@ -1456,6 +1505,19 @@ class module_ : public object { } bool nogil PYBIND11_MAYBE_UNUSED = detail::gil_not_used_option(options...); + +#ifdef Py_mod_multiple_interpreters + slots[i].slot = Py_mod_multiple_interpreters; + slots[i].value = detail::multi_interp_option(options...); + if (nogil && slots[i].value == Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED) { + // if you support free threading and multi-interpreters, + // then you definitely also support per-interpreter GIL + // even if you don't know it. + slots[i].value = Py_MOD_PER_INTERPRETER_GIL_SUPPORTED; + } + ++i; +#endif + if (nogil) { #if defined(Py_mod_gil) && defined(Py_GIL_DISABLED) if (next_slot >= term_slot) { diff --git a/tests/test_embed/CMakeLists.txt b/tests/test_embed/CMakeLists.txt index 1a537a6580..2b87a0f82b 100644 --- a/tests/test_embed/CMakeLists.txt +++ b/tests/test_embed/CMakeLists.txt @@ -29,6 +29,7 @@ endif() find_package(Threads REQUIRED) add_executable(test_embed catch.cpp test_interpreter.cpp) +target_compile_definitions(test_embed PRIVATE "PYBIND11_SUBINTERPRETER_SUPPORT") pybind11_enable_warnings(test_embed) target_link_libraries(test_embed PRIVATE pybind11::embed Catch2::Catch2 Threads::Threads) @@ -44,6 +45,7 @@ add_custom_target( WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") pybind11_add_module(external_module THIN_LTO external_module.cpp) +target_compile_definitions(external_module PRIVATE "PYBIND11_SUBINTERPRETER_SUPPORT") set_target_properties(external_module PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") foreach(config ${CMAKE_CONFIGURATION_TYPES}) diff --git a/tests/test_embed/external_module.cpp b/tests/test_embed/external_module.cpp index 6564ecbef5..001ae6716e 100644 --- a/tests/test_embed/external_module.cpp +++ b/tests/test_embed/external_module.cpp @@ -6,7 +6,7 @@ namespace py = pybind11; * modules aren't preserved over a finalize/initialize. */ -PYBIND11_MODULE(external_module, m, py::mod_gil_not_used()) { +PYBIND11_MODULE(external_module, m, py::mod_gil_not_used(), py::mod_multi_interpreter_one_gil()) { class A { public: explicit A(int value) : v{value} {}; diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index c6c8a22d98..5dbddcc01b 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -323,6 +323,8 @@ TEST_CASE("Restart the interpreter") { } TEST_CASE("Subinterpreter") { + py::module_::import("external_module"); // in the main interpreter + // Add tags to the modules in the main interpreter and test the basics. py::module_::import("__main__").attr("main_tag") = "main interpreter"; { @@ -338,11 +340,24 @@ TEST_CASE("Subinterpreter") { auto *main_tstate = PyThreadState_Get(); auto *sub_tstate = Py_NewInterpreter(); - // Subinterpreters get their own copy of builtins. detail::get_internals() still - // works by returning from the static variable, i.e. all interpreters share a single - // global pybind11::internals; + // Subinterpreters get their own copy of builtins. REQUIRE_FALSE(has_state_dict_internals_obj()); + +#if defined(PYBIND11_SUBINTERPRETER_SUPPORT) && PY_VERSION_HEX >= 0x030C0000 + // internals hasn't been populated yet, but will be different for the subinterpreter + REQUIRE_FALSE(has_pybind11_internals_static()); + + py::list sys_path = py::module_::import("sys").attr("path"); + sys_path.append(py::str(".")); + + auto ext_int = py::module_::import("external_module").attr("internals_at")().cast(); + py::detail::get_internals(); REQUIRE(has_pybind11_internals_static()); + REQUIRE(reinterpret_cast(*py::detail::get_internals_pp()) == ext_int); +#else + // This static is still defined + REQUIRE(has_pybind11_internals_static()); +#endif // Modules tags should be gone. REQUIRE_FALSE(py::hasattr(py::module_::import("__main__"), "tag")); @@ -354,12 +369,16 @@ TEST_CASE("Subinterpreter") { REQUIRE(m.attr("add")(1, 2).cast() == 3); } + // The subinterpreter now has internals populated since we imported a pybind11 module + REQUIRE(has_pybind11_internals_static()); + // Restore main interpreter. Py_EndInterpreter(sub_tstate); PyThreadState_Swap(main_tstate); REQUIRE(py::hasattr(py::module_::import("__main__"), "main_tag")); REQUIRE(py::hasattr(py::module_::import("widget_module"), "extension_module_tag")); + REQUIRE(has_state_dict_internals_obj()); } TEST_CASE("Execution frame") { From 2bb95158c834b11b6a163343aa309067fb6a44db Mon Sep 17 00:00:00 2001 From: b-pass Date: Thu, 20 Mar 2025 06:50:49 -0400 Subject: [PATCH 02/35] Significant rewrite to avoid using thread_locals as much as possible. Since we can avoid them by checking this atomic, the cmake config conditional shouldn't be necessary. The slower path (with thread_locals and extra checks) only comes in when a second interpreter is actually instanciated. --- CMakeLists.txt | 4 - include/pybind11/detail/common.h | 8 + include/pybind11/detail/internals.h | 215 ++++++++++++--------- include/pybind11/detail/type_caster_base.h | 10 - include/pybind11/embed.h | 28 ++- include/pybind11/pybind11.h | 22 +-- tests/test_embed/CMakeLists.txt | 2 - tests/test_embed/test_interpreter.cpp | 11 +- 8 files changed, 165 insertions(+), 135 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bba96fb66e..d570112dd1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,7 +92,6 @@ option(PYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION "To enforce that a handle_type_name<> specialization exists" OFF) option(PYBIND11_SIMPLE_GIL_MANAGEMENT "Use simpler GIL management logic that does not support disassociation" OFF) -option(PYBIND11_SUBINTERPRETER_SUPPORT "Enable support for sub-interpreters" OFF) set(PYBIND11_INTERNALS_VERSION "" CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.") @@ -104,9 +103,6 @@ endif() if(PYBIND11_SIMPLE_GIL_MANAGEMENT) add_compile_definitions(PYBIND11_SIMPLE_GIL_MANAGEMENT) endif() -if(PYBIND11_SUBINTERPRETER_SUPPORT) - add_compile_definitions(PYBIND11_SUBINTERPRETER_SUPPORT) -endif() cmake_dependent_option( USE_PYTHON_INCLUDE_DIR diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index ab6221aff4..93b048fd75 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -232,6 +232,13 @@ # define PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF #endif +// Slightly faster code paths are available when this is NOT defined, so undefine it for impls +// that do not have subinterpreter. Nothing breaks if this is defined but the impl does not +// actually support subinterpreters. +#if PY_VERSION_HEX >= 0x030C0000 && !defined(PYPY_VERSION) && !defined(GRAALVM_PYTHON) +# define PYBIND11_SUBINTERPRETER_SUPPORT +#endif + // #define PYBIND11_STR_LEGACY_PERMISSIVE // If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject // (probably surprising and never documented, but this was the @@ -407,6 +414,7 @@ PYBIND11_WARNING_DISABLE_CLANG("-Wgnu-zero-variadic-macro-arguments") return m.ptr(); \ } \ int PYBIND11_CONCAT(pybind11_exec_, name)(PyObject * pm) { \ + pybind11::detail::get_interpreter_count()++; \ try { \ auto m = pybind11::reinterpret_borrow<::pybind11::module_>(pm); \ PYBIND11_CONCAT(pybind11_init_, name)(m); \ diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index b1dd286c5c..141576235f 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -15,6 +15,7 @@ #include "common.h" +#include #include #include #include @@ -249,29 +250,46 @@ struct type_info { "__pybind11_module_local_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) \ PYBIND11_COMPILER_TYPE_LEADING_UNDERSCORE PYBIND11_PLATFORM_ABI_ID "__" +inline PyThreadState *get_thread_state_unchecked() { +#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON) + return PyThreadState_GET(); +#elif PY_VERSION_HEX < 0x030D0000 + return _PyThreadState_UncheckedGet(); +#else + return PyThreadState_GetUnchecked(); +#endif +} + +/// We use this count to figure out if there are or have been multiple sub-interpreters active at +/// any point. This must never decrease while any interpreter may be running in any thread! +inline std::atomic &get_interpreter_count() { + static std::atomic counter(0); + return counter; +} + /// Each module locally stores a pointer to the `internals` data. The data /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. inline internals **&get_internals_pp() { -#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON) || PY_VERSION_HEX < 0x030C0000 \ - || !defined(PYBIND11_SUBINTERPRETER_SUPPORT) - static internals **internals_pp = nullptr; -#else - static thread_local internals **internals_pp = nullptr; - // This is one per interpreter, we cache it but if the thread changed - // then we need to invalidate our cache - // the caller will find the right value and set it if its null - static thread_local PyThreadState *tstate_cached = nullptr; -# if PY_VERSION_HEX < 0x030D0000 - PyThreadState *tstate = _PyThreadState_UncheckedGet(); -# else - PyThreadState *tstate = PyThreadState_GetUnchecked(); -# endif - if (tstate != tstate_cached) { - tstate_cached = tstate; - internals_pp = nullptr; +#ifdef PYBIND11_SUBINTERPRETER_SUPPORT + if (get_interpreter_count() > 1) { + // Internals is one per interpreter. When multiple interpreters are alive in different + // threads we have to allow them to have different internals, so we need a thread_local. + static thread_local internals **t_internals_pp = nullptr; + // Whenever the interpreter changes we need to invalidate the internals_pp. That is slow, + // so we only do it when the PyThreadState has changed, which indicates the interpreter + // might have changed as well. + static thread_local PyThreadState *tstate_cached = nullptr; + auto *tstate = get_thread_state_unchecked(); + if (tstate != tstate_cached) { + tstate_cached = tstate; + // the caller will fetch the instance from the state dict or create a new one + t_internals_pp = nullptr; + } + return t_internals_pp; } #endif - return internals_pp; + static internals **s_internals_pp = nullptr; + return s_internals_pp; } // forward decl @@ -402,20 +420,6 @@ inline object get_python_state_dict() { return state_dict; } -inline object get_internals_obj_from_state_dict(handle state_dict) { - return reinterpret_steal( - dict_getitemstringref(state_dict.ptr(), PYBIND11_INTERNALS_ID)); -} - -inline internals **get_internals_pp_from_capsule(handle obj) { - void *raw_ptr = PyCapsule_GetPointer(obj.ptr(), /*name=*/nullptr); - if (raw_ptr == nullptr) { - raise_from(PyExc_SystemError, "pybind11::detail::get_internals_pp_from_capsule() FAILED"); - throw error_already_set(); - } - return static_cast(raw_ptr); -} - inline uint64_t round_up_to_next_pow2(uint64_t x) { // Round-up to the next power of two. // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 @@ -430,9 +434,41 @@ inline uint64_t round_up_to_next_pow2(uint64_t x) { return x; } +#if defined(PYBIND11_SIMPLE_GIL_MANAGEMENT) +using internals_safe_gil_scoped_acquire = gil_scoped_acquire; +#else +// Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals. +struct internals_safe_gil_scoped_acquire { + internals_safe_gil_scoped_acquire() : state(PyGILState_Ensure()) {} + internals_safe_gil_scoped_acquire(const internals_safe_gil_scoped_acquire &) = delete; + internals_safe_gil_scoped_acquire &operator=(const internals_safe_gil_scoped_acquire &) + = delete; + internals_safe_gil_scoped_acquire(internals_safe_gil_scoped_acquire &&) = delete; + internals_safe_gil_scoped_acquire &operator=(internals_safe_gil_scoped_acquire &&) = delete; + ~internals_safe_gil_scoped_acquire() { PyGILState_Release(state); } + const PyGILState_STATE state; +}; +#endif + +template +inline InternalsType **find_internals_pp(char const *state_dict_key) { + dict state_dict = get_python_state_dict(); + auto internals_obj + = reinterpret_steal(dict_getitemstringref(state_dict.ptr(), state_dict_key)); + if (internals_obj) { + void *raw_ptr = PyCapsule_GetPointer(internals_obj.ptr(), /*name=*/nullptr); + if (!raw_ptr) { + pybind11_fail("find_or_create_internals_pp: broken capsule!"); + } else { + return reinterpret_cast(raw_ptr); + } + } + return nullptr; +} + /// Return a reference to the current `internals` data PYBIND11_NOINLINE internals &get_internals() { - auto **&internals_pp = get_internals_pp(); + auto &internals_pp = get_internals_pp(); if (internals_pp && *internals_pp) { return **internals_pp; } @@ -440,10 +476,8 @@ PYBIND11_NOINLINE internals &get_internals() { gil_scoped_acquire_simple gil; error_scope err_scope; - dict state_dict = get_python_state_dict(); - if (object internals_obj = get_internals_obj_from_state_dict(state_dict)) { - internals_pp = get_internals_pp_from_capsule(internals_obj); - } + internals_pp = find_internals_pp(PYBIND11_INTERNALS_ID); + if (internals_pp && *internals_pp) { // We loaded the internals through `state_dict`, which means that our `error_already_set` // and `builtin_exception` may be different local classes than the ones set up in the @@ -462,8 +496,11 @@ PYBIND11_NOINLINE internals &get_internals() { #endif } else { if (!internals_pp) { - internals_pp = new internals *(); + internals_pp = new internals *(nullptr); + dict state = get_python_state_dict(); + state[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); } + auto *&internals_ptr = *internals_pp; internals_ptr = new internals(); @@ -481,7 +518,6 @@ PYBIND11_NOINLINE internals &get_internals() { } internals_ptr->istate = tstate->interp; - state_dict[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); internals_ptr->registered_exception_translators.push_front(&translate_exception); internals_ptr->static_property_type = make_static_property_type(); internals_ptr->default_metaclass = make_default_metaclass(); @@ -511,62 +547,61 @@ struct local_internals { std::forward_list registered_exception_translators; }; +inline local_internals **&get_local_internals_pp() { +#ifdef PYBIND11_SUBINTERPRETER_SUPPORT + if (get_interpreter_count() > 1) { + // Internals is one per interpreter. When multiple interpreters are alive in different + // threads we have to allow them to have different internals, so we need a thread_local. + static thread_local local_internals **t_internals_pp = nullptr; + // Whenever the interpreter changes we need to invalidate the internals_pp. That is slow, + // so we only do it when the PyThreadState has changed, which indicates the interpreter + // might have changed as well. + static thread_local PyThreadState *tstate_cached = nullptr; + auto *tstate = get_thread_state_unchecked(); + if (tstate != tstate_cached) { + tstate_cached = tstate; + // the caller will fetch the instance from the state dict or create a new one + t_internals_pp = nullptr; + } + return t_internals_pp; + } +#endif + static local_internals **s_internals_pp = nullptr; + return s_internals_pp; +} + +/// A string key uniquely describing this module +inline char const *get_local_internals_id() { + // Use the address of this static itself as part of the key, so that the value is uniquely tied + // to where the module is loaded in memory + static const std::string this_module_idstr + = PYBIND11_MODULE_LOCAL_ID + + std::to_string(reinterpret_cast(&this_module_idstr)); + return this_module_idstr.c_str(); +} + /// Works like `get_internals`, but for things which are locally registered. inline local_internals &get_local_internals() { - // Current static can be created in the interpreter finalization routine. If the later will be - // destroyed in another static variable destructor, creation of this static there will cause - // static deinitialization fiasco. In order to avoid it we avoid destruction of the - // local_internals static. One can read more about the problem and current solution here: - // https://google.github.io/styleguide/cppguide.html#Static_and_Global_Variables - -#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON) || PY_VERSION_HEX < 0x030C0000 \ - || !defined(PYBIND11_SUBINTERPRETER_SUPPORT) - static auto *locals = new local_internals(); -#else - static thread_local local_internals *locals = nullptr; - // This is one per interpreter, we cache it but if the interpreter changed - // then we need to invalidate our cache and re-fetch from the state dict - static thread_local PyThreadState *tstate_cached = nullptr; -# if PY_VERSION_HEX < 0x030D0000 - PyThreadState *tstate = _PyThreadState_UncheckedGet(); -# else - PyThreadState *tstate = PyThreadState_GetUnchecked(); -# endif - if (!tstate) { - pybind11_fail( - "pybind11::detail::get_local_internals() called without a current python thread"); + auto &local_internals_pp = get_local_internals_pp(); + if (local_internals_pp && *local_internals_pp) { + return **local_internals_pp; } - if (tstate != tstate_cached) { - // we create a unique value at first run which is based on a pointer to - // a (non-thread_local) static value in this function, then multiple - // loaded modules using this code will still each have a unique key. - static const std::string this_module_idstr - = PYBIND11_MODULE_LOCAL_ID - + std::to_string(reinterpret_cast(&this_module_idstr)); - - // Ensure that the GIL is held since we will need to make Python calls. - // Cannot use py::gil_scoped_acquire here since that constructor calls get_internals. - gil_scoped_acquire_simple gil; - error_scope err_scope; - dict state_dict = get_python_state_dict(); - object local_capsule = reinterpret_steal( - dict_getitemstringref(state_dict.ptr(), this_module_idstr.c_str())); - if (!local_capsule) { - locals = new local_internals(); - state_dict[this_module_idstr.c_str()] = capsule(reinterpret_cast(locals)); - } else { - void *ptr = PyCapsule_GetPointer(local_capsule.ptr(), nullptr); - if (!ptr) { - raise_from(PyExc_SystemError, "pybind11::detail::get_local_internals() FAILED"); - throw error_already_set(); - } - locals = reinterpret_cast(ptr); - } - tstate_cached = tstate; + + internals_safe_gil_scoped_acquire gil; + + error_scope err_scope; + + local_internals_pp = find_internals_pp(get_local_internals_id()); + if (!local_internals_pp) { + local_internals_pp = new local_internals *(nullptr); + dict state = get_python_state_dict(); + state[get_local_internals_id()] = capsule(reinterpret_cast(local_internals_pp)); + } + if (!*local_internals_pp) { + *local_internals_pp = new local_internals(); } -#endif - return *locals; + return **local_internals_pp; } #ifdef Py_GIL_DISABLED diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index 1105c08e7c..fca0232e24 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -497,16 +497,6 @@ PYBIND11_NOINLINE handle get_object_handle(const void *ptr, const detail::type_i }); } -inline PyThreadState *get_thread_state_unchecked() { -#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON) - return PyThreadState_GET(); -#elif PY_VERSION_HEX < 0x030D0000 - return _PyThreadState_UncheckedGet(); -#else - return PyThreadState_GetUnchecked(); -#endif -} - // Forward declarations void keep_alive_impl(handle nurse, handle patient); inline PyObject *make_new_instance(PyTypeObject *type); diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 1b95c32a7f..7385502e9c 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -193,6 +193,9 @@ inline void initialize_interpreter(bool init_signal_handlers = true, config.install_signal_handlers = init_signal_handlers ? 1 : 0; initialize_interpreter(&config, argc, argv, add_program_dir_to_path); #endif + + // There is exactly one interpreter alive currently. + detail::get_interpreter_count() = 1; } /** \rst @@ -234,16 +237,11 @@ inline void finalize_interpreter() { // Get the internals pointer (without creating it if it doesn't exist). It's possible for the // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()` // during destruction), so we get the pointer-pointer here and check it after Py_Finalize(). - detail::internals **internals_ptr_ptr = detail::get_internals_pp(); - // It could also be stashed in state_dict, so look there too: - if (object internals_obj - = get_internals_obj_from_state_dict(detail::get_python_state_dict())) { - internals_ptr_ptr = detail::get_internals_pp_from_capsule(internals_obj); - } - // Local internals contains data managed by the current interpreter, so we must clear them to - // avoid undefined behaviors when initializing another interpreter - detail::get_local_internals().registered_types_cpp.clear(); - detail::get_local_internals().registered_exception_translators.clear(); + auto &internals_ptr_ptr = detail::get_internals_pp(); + internals_ptr_ptr = detail::find_internals_pp(PYBIND11_INTERNALS_ID); + auto &local_internals_ptr_ptr = detail::get_local_internals_pp(); + local_internals_ptr_ptr + = detail::find_internals_pp(detail::get_local_internals_id()); Py_Finalize(); @@ -251,6 +249,16 @@ inline void finalize_interpreter() { delete *internals_ptr_ptr; *internals_ptr_ptr = nullptr; } + + // Local internals contains data managed by the current interpreter, so we must clear them to + // avoid undefined behaviors when initializing another interpreter + if (local_internals_ptr_ptr && *local_internals_ptr_ptr) { + delete *local_internals_ptr_ptr; + *local_internals_ptr_ptr = nullptr; + } + + // We know there is no interpreter alive now, so we can reset the count + detail::get_interpreter_count() = 0; } /** \rst diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 92a4495594..bb6a593c92 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1303,7 +1303,6 @@ inline bool gil_not_used_option(F &&, O &&...o) { #ifdef Py_mod_multiple_interpreters inline void *multi_interp_option() { return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; } -# ifdef PYBIND11_SUBINTERPRETER_SUPPORT template void *multi_interp_option(F &&, O &&...o); template @@ -1323,7 +1322,6 @@ inline void *multi_interp_option(mod_multi_interpreter_one_gil f, O &&...o) { } return Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED; } -# endif template inline void *multi_interp_option(F &&, O &&...o) { return multi_interp_option(o...); @@ -1481,7 +1479,7 @@ class module_ : public object { /// Must be a POD type, and must hold enough entries for all of the possible slots PLUS ONE for /// the sentinel (0) end slot. - using slots_array = std::array; + using slots_array = std::array; /** \rst Initialized a module def for use with multi-phase module initialization. @@ -1504,21 +1502,15 @@ class module_ : public object { ++next_slot; } - bool nogil PYBIND11_MAYBE_UNUSED = detail::gil_not_used_option(options...); - #ifdef Py_mod_multiple_interpreters - slots[i].slot = Py_mod_multiple_interpreters; - slots[i].value = detail::multi_interp_option(options...); - if (nogil && slots[i].value == Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED) { - // if you support free threading and multi-interpreters, - // then you definitely also support per-interpreter GIL - // even if you don't know it. - slots[i].value = Py_MOD_PER_INTERPRETER_GIL_SUPPORTED; - } - ++i; + if (next_slot >= term_slot) { + pybind11_fail("initialize_multiphase_module_def: not enough space in slots"); + } + slots[next_slot++] + = {Py_mod_multiple_interpreters, detail::multi_interp_option(options...)}; #endif - if (nogil) { + if (detail::gil_not_used_option(options...)) { #if defined(Py_mod_gil) && defined(Py_GIL_DISABLED) if (next_slot >= term_slot) { pybind11_fail("initialize_multiphase_module_def: not enough space in slots"); diff --git a/tests/test_embed/CMakeLists.txt b/tests/test_embed/CMakeLists.txt index 2b87a0f82b..1a537a6580 100644 --- a/tests/test_embed/CMakeLists.txt +++ b/tests/test_embed/CMakeLists.txt @@ -29,7 +29,6 @@ endif() find_package(Threads REQUIRED) add_executable(test_embed catch.cpp test_interpreter.cpp) -target_compile_definitions(test_embed PRIVATE "PYBIND11_SUBINTERPRETER_SUPPORT") pybind11_enable_warnings(test_embed) target_link_libraries(test_embed PRIVATE pybind11::embed Catch2::Catch2 Threads::Threads) @@ -45,7 +44,6 @@ add_custom_target( WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") pybind11_add_module(external_module THIN_LTO external_module.cpp) -target_compile_definitions(external_module PRIVATE "PYBIND11_SUBINTERPRETER_SUPPORT") set_target_properties(external_module PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") foreach(config ${CMAKE_CONFIGURATION_TYPES}) diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 5dbddcc01b..f5d08e4e64 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -256,8 +256,8 @@ TEST_CASE("Add program dir to path using PyConfig") { #endif bool has_state_dict_internals_obj() { - return bool( - py::detail::get_internals_obj_from_state_dict(py::detail::get_python_state_dict())); + py::dict state = py::detail::get_python_state_dict(); + return state.contains(PYBIND11_INTERNALS_ID); } bool has_pybind11_internals_static() { @@ -308,6 +308,7 @@ TEST_CASE("Restart the interpreter") { REQUIRE_FALSE(ran); py::finalize_interpreter(); REQUIRE(ran); + REQUIRE_FALSE(has_pybind11_internals_static()); py::initialize_interpreter(); REQUIRE_FALSE(has_state_dict_internals_obj()); REQUIRE_FALSE(has_pybind11_internals_static()); @@ -340,6 +341,8 @@ TEST_CASE("Subinterpreter") { auto *main_tstate = PyThreadState_Get(); auto *sub_tstate = Py_NewInterpreter(); + py::detail::get_interpreter_count()++; + // Subinterpreters get their own copy of builtins. REQUIRE_FALSE(has_state_dict_internals_obj()); @@ -347,8 +350,7 @@ TEST_CASE("Subinterpreter") { // internals hasn't been populated yet, but will be different for the subinterpreter REQUIRE_FALSE(has_pybind11_internals_static()); - py::list sys_path = py::module_::import("sys").attr("path"); - sys_path.append(py::str(".")); + py::list(py::module_::import("sys").attr("path")).append(py::str(".")); auto ext_int = py::module_::import("external_module").attr("internals_at")().cast(); py::detail::get_internals(); @@ -374,6 +376,7 @@ TEST_CASE("Subinterpreter") { // Restore main interpreter. Py_EndInterpreter(sub_tstate); + py::detail::get_interpreter_count() = 1; PyThreadState_Swap(main_tstate); REQUIRE(py::hasattr(py::module_::import("__main__"), "main_tag")); From 50f1645e0f1e9a9549ccea5de3271ad34c853475 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 21 Mar 2025 19:56:47 -0400 Subject: [PATCH 03/35] Add a test for per-interpreter GIL Uses two extra threads to demonstrate that neither shares a GIL. --- include/pybind11/detail/common.h | 2 +- tests/test_embed/external_module.cpp | 6 +- tests/test_embed/test_interpreter.cpp | 206 ++++++++++++++++++++++++++ 3 files changed, 212 insertions(+), 2 deletions(-) diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 93b048fd75..09f1c0946e 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -233,7 +233,7 @@ #endif // Slightly faster code paths are available when this is NOT defined, so undefine it for impls -// that do not have subinterpreter. Nothing breaks if this is defined but the impl does not +// that do not have subinterpreters. Nothing breaks if this is defined but the impl does not // actually support subinterpreters. #if PY_VERSION_HEX >= 0x030C0000 && !defined(PYPY_VERSION) && !defined(GRAALVM_PYTHON) # define PYBIND11_SUBINTERPRETER_SUPPORT diff --git a/tests/test_embed/external_module.cpp b/tests/test_embed/external_module.cpp index 001ae6716e..d4f82d38c5 100644 --- a/tests/test_embed/external_module.cpp +++ b/tests/test_embed/external_module.cpp @@ -6,7 +6,11 @@ namespace py = pybind11; * modules aren't preserved over a finalize/initialize. */ -PYBIND11_MODULE(external_module, m, py::mod_gil_not_used(), py::mod_multi_interpreter_one_gil()) { +PYBIND11_MODULE(external_module, + m, + py::mod_multi_interpreter_one_gil(), + py::mod_gil_not_used(), + py::mod_per_interpreter_gil()) { class A { public: explicit A(int value) : v{value} {}; diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index f5d08e4e64..7c21c56a1b 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -301,6 +301,8 @@ TEST_CASE("Restart the interpreter") { py::module_::import("__main__").attr("internals_destroy_test") = py::capsule(&ran, [](void *ran) { py::detail::get_internals(); + REQUIRE(has_state_dict_internals_obj()); + REQUIRE(has_pybind11_internals_static()); *static_cast(ran) = true; }); REQUIRE_FALSE(has_state_dict_internals_obj()); @@ -384,6 +386,210 @@ TEST_CASE("Subinterpreter") { REQUIRE(has_state_dict_internals_obj()); } +#if defined(PYBIND11_SUBINTERPRETER_SUPPORT) +TEST_CASE("Multiple Subinterpreters") { + // Make sure the module is in the main interpreter and save its pointer + auto *main_ext = py::module_::import("external_module").ptr(); + auto main_int + = py::module_::import("external_module").attr("internals_at")().cast(); + py::module_::import("external_module").attr("multi_interp") = "1"; + + auto *main_tstate = PyThreadState_Get(); + + /// Create and switch to a subinterpreter. + auto *sub1_tstate = Py_NewInterpreter(); + py::detail::get_interpreter_count()++; + + py::list(py::module_::import("sys").attr("path")).append(py::str(".")); + + // The subinterpreter has its own copy of this module which is completely separate from main + auto *sub1_ext = py::module_::import("external_module").ptr(); + REQUIRE(sub1_ext != main_ext); + REQUIRE_FALSE(py::hasattr(py::module_::import("external_module"), "multi_interp")); + py::module_::import("external_module").attr("multi_interp") = "2"; + // The sub-interpreter also has its own internals + auto sub1_int + = py::module_::import("external_module").attr("internals_at")().cast(); + REQUIRE(sub1_int != main_int); + + // Create another interpreter + auto *sub2_tstate = Py_NewInterpreter(); + py::detail::get_interpreter_count()++; + + py::list(py::module_::import("sys").attr("path")).append(py::str(".")); + + // The second subinterpreter is separate from both main and the other sub-interpreter + auto *sub2_ext = py::module_::import("external_module").ptr(); + REQUIRE(sub2_ext != main_ext); + REQUIRE(sub2_ext != sub1_ext); + REQUIRE_FALSE(py::hasattr(py::module_::import("external_module"), "multi_interp")); + py::module_::import("external_module").attr("multi_interp") = "3"; + // The sub-interpreter also has its own internals + auto sub2_int + = py::module_::import("external_module").attr("internals_at")().cast(); + REQUIRE(sub2_int != main_int); + REQUIRE(sub2_int != sub1_int); + + PyThreadState_Swap(sub1_tstate); // go back to sub1 + + REQUIRE(py::cast(py::module_::import("external_module").attr("multi_interp")) + == "2"); + + PyThreadState_Swap(main_tstate); // go back to main + + auto post_int + = py::module_::import("external_module").attr("internals_at")().cast(); + // Make sure internals went back the way it was before + REQUIRE(main_int == post_int); + + REQUIRE(py::cast(py::module_::import("external_module").attr("multi_interp")) + == "1"); + + PyThreadState_Swap(sub1_tstate); + Py_EndInterpreter(sub1_tstate); + PyThreadState_Swap(sub2_tstate); + Py_EndInterpreter(sub2_tstate); + + py::detail::get_interpreter_count() = 1; + PyThreadState_Swap(main_tstate); +} +#endif + +#if defined(Py_MOD_PER_INTERPRETER_GIL_SUPPORTED) && defined(PYBIND11_SUBINTERPRETER_SUPPORT) +TEST_CASE("Per-Subinterpreter GIL") { + auto main_int + = py::module_::import("external_module").attr("internals_at")().cast(); + + std::atomic started = 0, finished = 0, failure = 0, sync = 0; + +// REQUIRE throws on failure, so we can't use it within the thread +# define T_REQUIRE(status) \ + do { \ + assert(status); \ + if (!(status)) \ + ++failure; \ + } while (0) + + auto &&thread_main = [&](int num) { + while (started == 0) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + ++started; + + py::gil_scoped_acquire gil; + auto main_tstate = PyThreadState_Get(); + + // we have the GIL, we can access the main interpreter + auto t_int + = py::module_::import("external_module").attr("internals_at")().cast(); + T_REQUIRE(t_int == main_int); + py::module_::import("external_module").attr("multi_interp") = "1"; + + PyInterpreterConfig cfg = _PyInterpreterConfig_INIT; + PyThreadState *sub = nullptr; + auto status = Py_NewInterpreterFromConfig(&sub, &cfg); + T_REQUIRE(!PyStatus_IsError(status)); + + py::detail::get_interpreter_count()++; + + py::list(py::module_::import("sys").attr("path")).append(py::str(".")); + + // we have switched to the new interpreter and released the main gil + + // widget_module did not provide the mod_per_interpreter_gil tag, so it cannot be imported + bool caught = false; + try { + py::module_::import("widget_module"); + } catch (pybind11::error_already_set &pe) { + T_REQUIRE(pe.matches(PyExc_ImportError)); + std::string msg(pe.what()); + T_REQUIRE(msg.find("does not support loading in subinterpreters") + != std::string::npos); + caught = true; + } + T_REQUIRE(caught); + + T_REQUIRE(!py::hasattr(py::module_::import("external_module"), "multi_interp")); + py::module_::import("external_module").attr("multi_interp") = std::to_string(num); + + // wait for something to set sync to our thread number + // we are holding our subinterpreter's GIL + while (sync != num) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + + // now change it so the next thread can mvoe on + ++sync; + + // but keep holding the GIL until after the next thread moves on as well + while (sync == num + 1) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + + // one last check before quitting the thread, the internals should be different + auto sub_int + = py::module_::import("external_module").attr("internals_at")().cast(); + T_REQUIRE(sub_int != main_int); + + Py_EndInterpreter(sub); + + ++finished; + + PyThreadState_Swap( + main_tstate); // switch back so the scoped_acquire can release the GIL properly + }; + + std::thread(thread_main, 1).detach(); + std::thread(thread_main, 2).detach(); + + // we spawned two threads, at this point they are both waiting for started to increase + ++started; + + // ok now wait for the threads to start + while (started != 3) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + + // we still hold the main GIL, at this point both threads are waiting on the main GIL + // IN THE CASE of free threading, the threads are waiting on sync (because there is no GIL) + + // IF the below code hangs in one of the wait loops, then the child thread GIL behavior did not + // function as expected. + { + // release the GIL and allow the threads to run + py::gil_scoped_release nogil; + + // the threads are now waiting on the sync + REQUIRE(sync == 0); + + // this will trigger thread 1 and then advance and trigger 2 and then advance + sync = 1; + + // wait for thread 2 to advance + while (sync != 3) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + + // we know now that thread 1 has run and may be finishing + // and thread 2 is waiting for permission to advance + + // so we move sync so that thread 2 can finish executing + ++sync; + + ++finished; + + // now wait for both threads to complete + while (finished != 3) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + // now we have the gil again, sanity check + REQUIRE(py::cast(py::module_::import("external_module").attr("multi_interp")) + == "1"); + + // the threads are stopped. we can now lower this for the rest of the test + py::detail::get_interpreter_count() = 1; + + // make sure nothing unexpected happened inside the threads, now that they are completed + REQUIRE(failure == 0); +} +#endif + TEST_CASE("Execution frame") { // When the interpreter is embedded, there is no execution frame, but `py::exec` // should still function by using reasonable globals: `__main__.__dict__`. From 283602bb585dde3a080cdafc2235f7a202490f40 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 21 Mar 2025 21:30:43 -0400 Subject: [PATCH 04/35] Fix for nonconforming std::atomic constructors on some compilers --- tests/test_embed/test_interpreter.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 7c21c56a1b..08b647365d 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -460,7 +460,11 @@ TEST_CASE("Per-Subinterpreter GIL") { auto main_int = py::module_::import("external_module").attr("internals_at")().cast(); - std::atomic started = 0, finished = 0, failure = 0, sync = 0; + std::atomic started, sync, finished, failure; + started = 0; + sync = 0; + finished = 0; + failure = 0; // REQUIRE throws on failure, so we can't use it within the thread # define T_REQUIRE(status) \ @@ -587,6 +591,7 @@ TEST_CASE("Per-Subinterpreter GIL") { // make sure nothing unexpected happened inside the threads, now that they are completed REQUIRE(failure == 0); +#undef T_REQUIRE } #endif From 83135080945c05a3f222500d7645ee9037924ae4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 22 Mar 2025 01:31:26 +0000 Subject: [PATCH 05/35] style: pre-commit fixes --- tests/test_embed/test_interpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 08b647365d..3ae980fc50 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -591,7 +591,7 @@ TEST_CASE("Per-Subinterpreter GIL") { // make sure nothing unexpected happened inside the threads, now that they are completed REQUIRE(failure == 0); -#undef T_REQUIRE +# undef T_REQUIRE } #endif From dfd53ec169e8c076c0465b056649a4ab89253159 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 21 Mar 2025 21:49:46 -0400 Subject: [PATCH 06/35] Fix initializer to make MSVC happy. --- include/pybind11/detail/common.h | 2 +- tests/test_embed/test_interpreter.cpp | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 09f1c0946e..aa40dcc08e 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -232,7 +232,7 @@ # define PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF #endif -// Slightly faster code paths are available when this is NOT defined, so undefine it for impls +// Slightly faster code paths are available when this is NOT defined, so don't it for impls // that do not have subinterpreters. Nothing breaks if this is defined but the impl does not // actually support subinterpreters. #if PY_VERSION_HEX >= 0x030C0000 && !defined(PYPY_VERSION) && !defined(GRAALVM_PYTHON) diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 3ae980fc50..dcb7e17eb6 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -488,8 +488,11 @@ TEST_CASE("Per-Subinterpreter GIL") { T_REQUIRE(t_int == main_int); py::module_::import("external_module").attr("multi_interp") = "1"; - PyInterpreterConfig cfg = _PyInterpreterConfig_INIT; PyThreadState *sub = nullptr; + PyInterpreterConfig cfg; + memset(&cfg, 0, sizeof(cfg)); + cfg.check_multi_interp_extensions = 1; + cfg.gil = PyInterpreterConfig_OWN_GIL; auto status = Py_NewInterpreterFromConfig(&sub, &cfg); T_REQUIRE(!PyStatus_IsError(status)); From 193f93c4f86977aecdd462e51d19b97ac71b2336 Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 14 Apr 2025 19:24:56 -0400 Subject: [PATCH 07/35] Switch to gil_scoped_acquire_simple, get rid of old copy of it from internals.h --- include/pybind11/detail/internals.h | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 141576235f..303c37d542 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -434,22 +434,6 @@ inline uint64_t round_up_to_next_pow2(uint64_t x) { return x; } -#if defined(PYBIND11_SIMPLE_GIL_MANAGEMENT) -using internals_safe_gil_scoped_acquire = gil_scoped_acquire; -#else -// Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals. -struct internals_safe_gil_scoped_acquire { - internals_safe_gil_scoped_acquire() : state(PyGILState_Ensure()) {} - internals_safe_gil_scoped_acquire(const internals_safe_gil_scoped_acquire &) = delete; - internals_safe_gil_scoped_acquire &operator=(const internals_safe_gil_scoped_acquire &) - = delete; - internals_safe_gil_scoped_acquire(internals_safe_gil_scoped_acquire &&) = delete; - internals_safe_gil_scoped_acquire &operator=(internals_safe_gil_scoped_acquire &&) = delete; - ~internals_safe_gil_scoped_acquire() { PyGILState_Release(state); } - const PyGILState_STATE state; -}; -#endif - template inline InternalsType **find_internals_pp(char const *state_dict_key) { dict state_dict = get_python_state_dict(); @@ -473,6 +457,7 @@ PYBIND11_NOINLINE internals &get_internals() { return **internals_pp; } + // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals. gil_scoped_acquire_simple gil; error_scope err_scope; @@ -587,8 +572,8 @@ inline local_internals &get_local_internals() { return **local_internals_pp; } - internals_safe_gil_scoped_acquire gil; - + // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals. + gil_scoped_acquire_simple gil; error_scope err_scope; local_internals_pp = find_internals_pp(get_local_internals_id()); From 981ea3738cb81d46f4abe4451e0dc1b5f04017cb Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 14 Apr 2025 19:38:39 -0400 Subject: [PATCH 08/35] Use the PyThreadState's interp member rather than the thread state itself. --- include/pybind11/detail/internals.h | 34 +++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 303c37d542..dc29211005 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -275,14 +275,17 @@ inline internals **&get_internals_pp() { // Internals is one per interpreter. When multiple interpreters are alive in different // threads we have to allow them to have different internals, so we need a thread_local. static thread_local internals **t_internals_pp = nullptr; - // Whenever the interpreter changes we need to invalidate the internals_pp. That is slow, - // so we only do it when the PyThreadState has changed, which indicates the interpreter - // might have changed as well. - static thread_local PyThreadState *tstate_cached = nullptr; + static thread_local PyInterpreterState *istate_cached = nullptr; + // Whenever the interpreter changes on the current thread we need to invalidate the + // internals_pp so that it can be pulled from the interpreter's state dict. That is slow, + // so we use the current PyThreadState to check if it is necessary. The caller will see a + // null return and do the fetch from the state dict or create a new one (as needed). auto *tstate = get_thread_state_unchecked(); - if (tstate != tstate_cached) { - tstate_cached = tstate; - // the caller will fetch the instance from the state dict or create a new one + if (!tstate) { + istate_cached = nullptr; + t_internals_pp = nullptr; + } else if (tstate->interp != istate_cached) { + istate_cached = tstate->interp; t_internals_pp = nullptr; } return t_internals_pp; @@ -538,14 +541,17 @@ inline local_internals **&get_local_internals_pp() { // Internals is one per interpreter. When multiple interpreters are alive in different // threads we have to allow them to have different internals, so we need a thread_local. static thread_local local_internals **t_internals_pp = nullptr; - // Whenever the interpreter changes we need to invalidate the internals_pp. That is slow, - // so we only do it when the PyThreadState has changed, which indicates the interpreter - // might have changed as well. - static thread_local PyThreadState *tstate_cached = nullptr; + static thread_local PyInterpreterState *istate_cached = nullptr; + // Whenever the interpreter changes on the current thread we need to invalidate the + // internals_pp so that it can be pulled from the interpreter's state dict. That is slow, + // so we use the current PyThreadState to check if it is necessary. The caller will see a + // null return and do the fetch from the state dict or create a new one (as needed). auto *tstate = get_thread_state_unchecked(); - if (tstate != tstate_cached) { - tstate_cached = tstate; - // the caller will fetch the instance from the state dict or create a new one + if (!tstate) { + istate_cached = nullptr; + t_internals_pp = nullptr; + } else if (tstate->interp != istate_cached) { + istate_cached = tstate->interp; t_internals_pp = nullptr; } return t_internals_pp; From a547a1a586237db826b648fcd4382a87a790e7f1 Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 14 Apr 2025 19:40:20 -0400 Subject: [PATCH 09/35] Be more explicit about the type of the internalspp --- include/pybind11/detail/internals.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index dc29211005..a2fea044be 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -455,7 +455,7 @@ inline InternalsType **find_internals_pp(char const *state_dict_key) { /// Return a reference to the current `internals` data PYBIND11_NOINLINE internals &get_internals() { - auto &internals_pp = get_internals_pp(); + auto **&internals_pp = get_internals_pp(); if (internals_pp && *internals_pp) { return **internals_pp; } @@ -573,7 +573,7 @@ inline char const *get_local_internals_id() { /// Works like `get_internals`, but for things which are locally registered. inline local_internals &get_local_internals() { - auto &local_internals_pp = get_local_internals_pp(); + auto **&local_internals_pp = get_local_internals_pp(); if (local_internals_pp && *local_internals_pp) { return **local_internals_pp; } From 0707fa47e6e810f0acb41586b0f6914f80c99f62 Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 14 Apr 2025 19:41:21 -0400 Subject: [PATCH 10/35] Suggested renamings and rewordings --- include/pybind11/detail/common.h | 8 ++++---- include/pybind11/detail/internals.h | 8 ++++---- include/pybind11/embed.h | 4 ++-- tests/test_embed/test_interpreter.cpp | 20 ++++++++++---------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index aa40dcc08e..49a7c185c2 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -232,9 +232,9 @@ # define PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF #endif -// Slightly faster code paths are available when this is NOT defined, so don't it for impls -// that do not have subinterpreters. Nothing breaks if this is defined but the impl does not -// actually support subinterpreters. +// Slightly faster code paths are available when PYBIND11_SUBINTERPRETER_SUPPORT is *not* defined, +// so avoid defining it for implementations that do not support subinterpreters. +// However, defining it unnecessarily is not expected to break anything. #if PY_VERSION_HEX >= 0x030C0000 && !defined(PYPY_VERSION) && !defined(GRAALVM_PYTHON) # define PYBIND11_SUBINTERPRETER_SUPPORT #endif @@ -414,7 +414,7 @@ PYBIND11_WARNING_DISABLE_CLANG("-Wgnu-zero-variadic-macro-arguments") return m.ptr(); \ } \ int PYBIND11_CONCAT(pybind11_exec_, name)(PyObject * pm) { \ - pybind11::detail::get_interpreter_count()++; \ + pybind11::detail::get_interpreter_counter()++; \ try { \ auto m = pybind11::reinterpret_borrow<::pybind11::module_>(pm); \ PYBIND11_CONCAT(pybind11_init_, name)(m); \ diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index a2fea044be..621f707aab 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -260,9 +260,9 @@ inline PyThreadState *get_thread_state_unchecked() { #endif } -/// We use this count to figure out if there are or have been multiple sub-interpreters active at +/// We use this counter to figure out if there are or have been multiple subinterpreters active at /// any point. This must never decrease while any interpreter may be running in any thread! -inline std::atomic &get_interpreter_count() { +inline std::atomic &get_interpreter_counter() { static std::atomic counter(0); return counter; } @@ -271,7 +271,7 @@ inline std::atomic &get_interpreter_count() { /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. inline internals **&get_internals_pp() { #ifdef PYBIND11_SUBINTERPRETER_SUPPORT - if (get_interpreter_count() > 1) { + if (get_interpreter_counter() > 1) { // Internals is one per interpreter. When multiple interpreters are alive in different // threads we have to allow them to have different internals, so we need a thread_local. static thread_local internals **t_internals_pp = nullptr; @@ -537,7 +537,7 @@ struct local_internals { inline local_internals **&get_local_internals_pp() { #ifdef PYBIND11_SUBINTERPRETER_SUPPORT - if (get_interpreter_count() > 1) { + if (get_interpreter_counter() > 1) { // Internals is one per interpreter. When multiple interpreters are alive in different // threads we have to allow them to have different internals, so we need a thread_local. static thread_local local_internals **t_internals_pp = nullptr; diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 7385502e9c..7ba60d23b5 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -195,7 +195,7 @@ inline void initialize_interpreter(bool init_signal_handlers = true, #endif // There is exactly one interpreter alive currently. - detail::get_interpreter_count() = 1; + detail::get_interpreter_counter() = 1; } /** \rst @@ -258,7 +258,7 @@ inline void finalize_interpreter() { } // We know there is no interpreter alive now, so we can reset the count - detail::get_interpreter_count() = 0; + detail::get_interpreter_counter() = 0; } /** \rst diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index dcb7e17eb6..7be18b7065 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -343,7 +343,7 @@ TEST_CASE("Subinterpreter") { auto *main_tstate = PyThreadState_Get(); auto *sub_tstate = Py_NewInterpreter(); - py::detail::get_interpreter_count()++; + py::detail::get_interpreter_counter()++; // Subinterpreters get their own copy of builtins. REQUIRE_FALSE(has_state_dict_internals_obj()); @@ -378,7 +378,7 @@ TEST_CASE("Subinterpreter") { // Restore main interpreter. Py_EndInterpreter(sub_tstate); - py::detail::get_interpreter_count() = 1; + py::detail::get_interpreter_counter() = 1; PyThreadState_Swap(main_tstate); REQUIRE(py::hasattr(py::module_::import("__main__"), "main_tag")); @@ -398,7 +398,7 @@ TEST_CASE("Multiple Subinterpreters") { /// Create and switch to a subinterpreter. auto *sub1_tstate = Py_NewInterpreter(); - py::detail::get_interpreter_count()++; + py::detail::get_interpreter_counter()++; py::list(py::module_::import("sys").attr("path")).append(py::str(".")); @@ -407,24 +407,24 @@ TEST_CASE("Multiple Subinterpreters") { REQUIRE(sub1_ext != main_ext); REQUIRE_FALSE(py::hasattr(py::module_::import("external_module"), "multi_interp")); py::module_::import("external_module").attr("multi_interp") = "2"; - // The sub-interpreter also has its own internals + // The subinterpreter also has its own internals auto sub1_int = py::module_::import("external_module").attr("internals_at")().cast(); REQUIRE(sub1_int != main_int); // Create another interpreter auto *sub2_tstate = Py_NewInterpreter(); - py::detail::get_interpreter_count()++; + py::detail::get_interpreter_counter()++; py::list(py::module_::import("sys").attr("path")).append(py::str(".")); - // The second subinterpreter is separate from both main and the other sub-interpreter + // The second subinterpreter is separate from both main and the other subinterpreter auto *sub2_ext = py::module_::import("external_module").ptr(); REQUIRE(sub2_ext != main_ext); REQUIRE(sub2_ext != sub1_ext); REQUIRE_FALSE(py::hasattr(py::module_::import("external_module"), "multi_interp")); py::module_::import("external_module").attr("multi_interp") = "3"; - // The sub-interpreter also has its own internals + // The subinterpreter also has its own internals auto sub2_int = py::module_::import("external_module").attr("internals_at")().cast(); REQUIRE(sub2_int != main_int); @@ -450,7 +450,7 @@ TEST_CASE("Multiple Subinterpreters") { PyThreadState_Swap(sub2_tstate); Py_EndInterpreter(sub2_tstate); - py::detail::get_interpreter_count() = 1; + py::detail::get_interpreter_counter() = 1; PyThreadState_Swap(main_tstate); } #endif @@ -496,7 +496,7 @@ TEST_CASE("Per-Subinterpreter GIL") { auto status = Py_NewInterpreterFromConfig(&sub, &cfg); T_REQUIRE(!PyStatus_IsError(status)); - py::detail::get_interpreter_count()++; + py::detail::get_interpreter_counter()++; py::list(py::module_::import("sys").attr("path")).append(py::str(".")); @@ -590,7 +590,7 @@ TEST_CASE("Per-Subinterpreter GIL") { == "1"); // the threads are stopped. we can now lower this for the rest of the test - py::detail::get_interpreter_count() = 1; + py::detail::get_interpreter_counter() = 1; // make sure nothing unexpected happened inside the threads, now that they are completed REQUIRE(failure == 0); From e6ff3c4ecd23a1278970682be4f40888d3aceb9e Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 14 Apr 2025 19:56:02 -0400 Subject: [PATCH 11/35] Rename find_internals_pp and change it to take in the state dict reference --- include/pybind11/detail/common.h | 2 +- include/pybind11/detail/internals.h | 19 +++++++++++-------- include/pybind11/embed.h | 15 ++++++++++----- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 49a7c185c2..7bc937b864 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -414,7 +414,7 @@ PYBIND11_WARNING_DISABLE_CLANG("-Wgnu-zero-variadic-macro-arguments") return m.ptr(); \ } \ int PYBIND11_CONCAT(pybind11_exec_, name)(PyObject * pm) { \ - pybind11::detail::get_interpreter_counter()++; \ + pybind11::detail::get_interpreter_counter()++; \ try { \ auto m = pybind11::reinterpret_borrow<::pybind11::module_>(pm); \ PYBIND11_CONCAT(pybind11_init_, name)(m); \ diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 621f707aab..7608a9f705 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -438,8 +438,8 @@ inline uint64_t round_up_to_next_pow2(uint64_t x) { } template -inline InternalsType **find_internals_pp(char const *state_dict_key) { - dict state_dict = get_python_state_dict(); +inline InternalsType **get_internals_pp_from_capsule_in_state_dict(dict &state_dict, + char const *state_dict_key) { auto internals_obj = reinterpret_steal(dict_getitemstringref(state_dict.ptr(), state_dict_key)); if (internals_obj) { @@ -464,7 +464,9 @@ PYBIND11_NOINLINE internals &get_internals() { gil_scoped_acquire_simple gil; error_scope err_scope; - internals_pp = find_internals_pp(PYBIND11_INTERNALS_ID); + dict state_dict = get_python_state_dict(); + internals_pp = get_internals_pp_from_capsule_in_state_dict(state_dict, + PYBIND11_INTERNALS_ID); if (internals_pp && *internals_pp) { // We loaded the internals through `state_dict`, which means that our `error_already_set` @@ -485,8 +487,7 @@ PYBIND11_NOINLINE internals &get_internals() { } else { if (!internals_pp) { internals_pp = new internals *(nullptr); - dict state = get_python_state_dict(); - state[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); + state_dict[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); } auto *&internals_ptr = *internals_pp; @@ -582,11 +583,13 @@ inline local_internals &get_local_internals() { gil_scoped_acquire_simple gil; error_scope err_scope; - local_internals_pp = find_internals_pp(get_local_internals_id()); + dict state_dict = get_python_state_dict(); + local_internals_pp = get_internals_pp_from_capsule_in_state_dict( + state_dict, get_local_internals_id()); if (!local_internals_pp) { local_internals_pp = new local_internals *(nullptr); - dict state = get_python_state_dict(); - state[get_local_internals_id()] = capsule(reinterpret_cast(local_internals_pp)); + state_dict[get_local_internals_id()] + = capsule(reinterpret_cast(local_internals_pp)); } if (!*local_internals_pp) { *local_internals_pp = new local_internals(); diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 7ba60d23b5..9d964edee2 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -237,11 +237,16 @@ inline void finalize_interpreter() { // Get the internals pointer (without creating it if it doesn't exist). It's possible for the // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()` // during destruction), so we get the pointer-pointer here and check it after Py_Finalize(). - auto &internals_ptr_ptr = detail::get_internals_pp(); - internals_ptr_ptr = detail::find_internals_pp(PYBIND11_INTERNALS_ID); - auto &local_internals_ptr_ptr = detail::get_local_internals_pp(); - local_internals_ptr_ptr - = detail::find_internals_pp(detail::get_local_internals_id()); + auto **&internals_ptr_ptr = detail::get_internals_pp(); + auto **&local_internals_ptr_ptr = detail::get_local_internals_pp(); + { + dict state_dict = detail::get_python_state_dict(); + internals_ptr_ptr = detail::get_internals_pp_from_capsule_in_state_dict( + state_dict, PYBIND11_INTERNALS_ID); + local_internals_ptr_ptr + = detail::get_internals_pp_from_capsule_in_state_dict( + state_dict, detail::get_local_internals_id()); + } Py_Finalize(); From 6add93a10053469d0046c1faf90f91b9c16d1346 Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 14 Apr 2025 20:09:45 -0400 Subject: [PATCH 12/35] Use the old raise_from instead of pybind11_fail --- include/pybind11/detail/internals.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 7608a9f705..faed066e28 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -445,10 +445,11 @@ inline InternalsType **get_internals_pp_from_capsule_in_state_dict(dict &state_d if (internals_obj) { void *raw_ptr = PyCapsule_GetPointer(internals_obj.ptr(), /*name=*/nullptr); if (!raw_ptr) { - pybind11_fail("find_or_create_internals_pp: broken capsule!"); - } else { - return reinterpret_cast(raw_ptr); + raise_from(PyExc_SystemError, + "pybind11::detail::get_internals_pp_from_capsule_in_state_dict() FAILED"); + throw error_already_set(); } + return reinterpret_cast(raw_ptr); } return nullptr; } From 80df0a7267d9fb704b03253e6e2d09352f8cc56c Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 14 Apr 2025 21:22:28 -0400 Subject: [PATCH 13/35] Move most of the internals initialization into its constructor. --- include/pybind11/detail/internals.h | 86 ++++++++++++++++------------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index faed066e28..01bc946606 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -54,6 +54,7 @@ constexpr const char *internals_function_record_capsule_name = "pybind11_functio inline PyTypeObject *make_static_property_type(); inline PyTypeObject *make_default_metaclass(); inline PyObject *make_object_base_type(PyTypeObject *metaclass); +inline void translate_exception(std::exception_ptr p); // The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new // Thread Specific Storage (TSS) API. @@ -179,9 +180,9 @@ struct internals { // extensions std::forward_list static_strings; // Stores the std::strings backing // detail::c_str() - PyTypeObject *static_property_type; - PyTypeObject *default_metaclass; - PyObject *instance_base; + PyTypeObject *static_property_type = nullptr; + PyTypeObject *default_metaclass = nullptr; + PyObject *instance_base = nullptr; // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined: PYBIND11_TLS_KEY_INIT(tstate) PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key) @@ -190,7 +191,36 @@ struct internals { type_map native_enum_type_map; - internals() = default; + internals() { + PyThreadState *curtstate = PyThreadState_Get(); + // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) + if (!PYBIND11_TLS_KEY_CREATE(tstate)) { + pybind11_fail( + "internals constructor: could not successfully initialize the tstate TSS key!"); + } + PYBIND11_TLS_REPLACE_VALUE(tstate, curtstate); + + // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) + if (!PYBIND11_TLS_KEY_CREATE(loader_life_support_tls_key)) { + pybind11_fail("internals constructor: could not successfully initialize the " + "loader_life_support TSS key!"); + } + + istate = curtstate->interp; + registered_exception_translators.push_front(&translate_exception); + static_property_type = make_static_property_type(); + default_metaclass = make_default_metaclass(); +#ifdef Py_GIL_DISABLED + // Scale proportional to the number of cores. 2x is a heuristic to reduce contention. + auto num_shards + = static_cast(round_up_to_next_pow2(2 * std::thread::hardware_concurrency())); + if (num_shards == 0) { + num_shards = 1; + } + instance_shards.reset(new instance_map_shard[num_shards]); + instance_shards_mask = num_shards - 1; +#endif + } internals(const internals &other) = delete; internals &operator=(const internals &other) = delete; ~internals() { @@ -458,9 +488,12 @@ inline InternalsType **get_internals_pp_from_capsule_in_state_dict(dict &state_d PYBIND11_NOINLINE internals &get_internals() { auto **&internals_pp = get_internals_pp(); if (internals_pp && *internals_pp) { + // This is the fast path, everything is already setup, just return it return **internals_pp; } + // Slow path, something needs fetched from the state dict or created + // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals. gil_scoped_acquire_simple gil; error_scope err_scope; @@ -468,8 +501,12 @@ PYBIND11_NOINLINE internals &get_internals() { dict state_dict = get_python_state_dict(); internals_pp = get_internals_pp_from_capsule_in_state_dict(state_dict, PYBIND11_INTERNALS_ID); + if (!internals_pp) { + internals_pp = new internals *(nullptr); + state_dict[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); + } - if (internals_pp && *internals_pp) { + if (*internals_pp) { // We loaded the internals through `state_dict`, which means that our `error_already_set` // and `builtin_exception` may be different local classes than the ones set up in the // initial exception translator, below, so add another for our local exception classes. @@ -486,43 +523,16 @@ PYBIND11_NOINLINE internals &get_internals() { } #endif } else { - if (!internals_pp) { - internals_pp = new internals *(nullptr); - state_dict[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); - } - - auto *&internals_ptr = *internals_pp; + auto &internals_ptr = *internals_pp; internals_ptr = new internals(); - PyThreadState *tstate = PyThreadState_Get(); - // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) - if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->tstate)) { - pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!"); + if (!internals_ptr->instance_base) { + // This calls get_internals, so cannot be called from within the internals constructor + // called above because internals_ptr must be set before get_internals is called again + internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass); } - PYBIND11_TLS_REPLACE_VALUE(internals_ptr->tstate, tstate); - - // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) - if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->loader_life_support_tls_key)) { - pybind11_fail("get_internals: could not successfully initialize the " - "loader_life_support TSS key!"); - } - - internals_ptr->istate = tstate->interp; - internals_ptr->registered_exception_translators.push_front(&translate_exception); - internals_ptr->static_property_type = make_static_property_type(); - internals_ptr->default_metaclass = make_default_metaclass(); - internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass); -#ifdef Py_GIL_DISABLED - // Scale proportional to the number of cores. 2x is a heuristic to reduce contention. - auto num_shards - = static_cast(round_up_to_next_pow2(2 * std::thread::hardware_concurrency())); - if (num_shards == 0) { - num_shards = 1; - } - internals_ptr->instance_shards.reset(new instance_map_shard[num_shards]); - internals_ptr->instance_shards_mask = num_shards - 1; -#endif // Py_GIL_DISABLED } + return **internals_pp; } From d0baf278e4cc35673f183c81b25b06868615f7ba Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 14 Apr 2025 21:30:35 -0400 Subject: [PATCH 14/35] Move round_up_to_next_pow2 function upwards --- include/pybind11/detail/internals.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 01bc946606..0484016967 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -151,6 +151,20 @@ struct instance_map_shard { static_assert(sizeof(instance_map_shard) % 64 == 0, "instance_map_shard size is not a multiple of 64 bytes"); + +inline uint64_t round_up_to_next_pow2(uint64_t x) { + // Round-up to the next power of two. + // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + x--; + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + x |= (x >> 32); + x++; + return x; +} #endif /// Internal data structure used to track registered instances and types. @@ -453,20 +467,6 @@ inline object get_python_state_dict() { return state_dict; } -inline uint64_t round_up_to_next_pow2(uint64_t x) { - // Round-up to the next power of two. - // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - x--; - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - x |= (x >> 32); - x++; - return x; -} - template inline InternalsType **get_internals_pp_from_capsule_in_state_dict(dict &state_dict, char const *state_dict_key) { From 2d774be8657a06da1ad0f8c73a5b0f474b536296 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 9 May 2025 17:11:13 -0400 Subject: [PATCH 15/35] Remove redundant forward decl --- include/pybind11/detail/internals.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 0484016967..5084892643 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -339,9 +339,6 @@ inline internals **&get_internals_pp() { return s_internals_pp; } -// forward decl -inline void translate_exception(std::exception_ptr); - template >::value, int> = 0> bool handle_nested_exception(const T &exc, const std::exception_ptr &p) { From df183343bc73a643f21373759011b9fae6621bf6 Mon Sep 17 00:00:00 2001 From: b-pass Date: Thu, 8 May 2025 23:22:57 -0400 Subject: [PATCH 16/35] Add a python-driven subinterpreter test --- tests/CMakeLists.txt | 14 +++++++ tests/mod_test_interpreters.cpp | 16 ++++++++ tests/test_interpreters.py | 71 +++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) create mode 100644 tests/mod_test_interpreters.cpp create mode 100644 tests/test_interpreters.py diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 69976f745a..51e3c16bbe 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -148,6 +148,7 @@ set(PYBIND11_TEST_FILES test_exceptions test_factory_constructors test_gil_scoped + test_interpreters.py test_iostream test_kwargs_and_defaults test_local_bindings @@ -562,6 +563,19 @@ add_custom_target( WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" USES_TERMINAL) +if(NOT PYBIND11_CUDA_TESTS) + # This module doesn't get mixed with other test modules because those aren't subinterpreter safe. + pybind11_add_module(mod_test_interpreters THIN_LTO mod_test_interpreters.cpp) + set_target_properties(mod_test_interpreters PROPERTIES LIBRARY_OUTPUT_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}") + foreach(config ${CMAKE_CONFIGURATION_TYPES}) + string(TOUPPER ${config} config) + set_target_properties(mod_test_interpreters PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${config} + "${CMAKE_CURRENT_BINARY_DIR}") + endforeach() + add_dependencies(pytest mod_test_interpreters) +endif() + if(PYBIND11_TEST_OVERRIDE) add_custom_command( TARGET pytest diff --git a/tests/mod_test_interpreters.cpp b/tests/mod_test_interpreters.cpp new file mode 100644 index 0000000000..70a7662aa9 --- /dev/null +++ b/tests/mod_test_interpreters.cpp @@ -0,0 +1,16 @@ +#include + +namespace py = pybind11; + +/* Simple test module/test class to check that the referenced internals data of external pybind11 + * modules are different across subinterpreters + */ + +PYBIND11_MODULE(mod_test_interpreters, + m, + py::mod_multi_interpreter_one_gil(), + py::mod_gil_not_used(), + py::mod_per_interpreter_gil()) { + m.def("internals_at", + []() { return reinterpret_cast(&py::detail::get_internals()); }); +} diff --git a/tests/test_interpreters.py b/tests/test_interpreters.py new file mode 100644 index 0000000000..788bfac6ef --- /dev/null +++ b/tests/test_interpreters.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import os +import pickle +import sys + +import pytest + + +def test_interpreters(): + """Makes sure the internals object differs across subinterpreters""" + + if sys.version_info < (3, 12): + pytest.skip("Test requires independent subinterpreter support (3.12+)") + return + + sys.path.append(".") + + i = None + try: + # 3.14+ + import interpreters as i + except ImportError: + try: + # 3.13 + import _interpreters as i + except ImportError: + try: + # 3.12 + import _xxsubinterpreters as i + except ImportError: + pytest.skip("Test requires a the interpreters stdlib module") + return + + import mod_test_interpreters as m + + code = """ +import mod_test_interpreters as m +import pickle +with open(pipeo, 'wb') as f: + pickle.dump(m.internals_at(), f) +""" + + interp1 = i.create() + interp2 = i.create() + try: + pipei, pipeo = os.pipe() + i.run_string(interp1, code, shared={"pipeo": pipeo}) + with open(pipei, "rb") as f: + res1 = pickle.load(f) + + pipei, pipeo = os.pipe() + i.run_string(interp2, code, shared={"pipeo": pipeo}) + with open(pipei, "rb") as f: + res2 = pickle.load(f) + + # do this while the two interpreters are active + import mod_test_interpreters as m2 + + print(dir(m)) + print(dir(m2)) + assert m.internals_at() == m2.internals_at(), ( + "internals should be the same within the main interpreter" + ) + finally: + i.destroy(interp1) + i.destroy(interp2) + + assert res1 != m.internals_at(), "internals should differ from main interpreter" + assert res2 != m.internals_at(), "internals should differ from main interpreter" + assert res1 != res2, "internals should differ between interpreters" From c8f43ec69d492f2187583329c750f8fdacff5336 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 9 May 2025 19:00:15 -0400 Subject: [PATCH 17/35] Disable the python subinterpreter test on emscripten Can't load the native-built cpp modules. --- tests/test_interpreters.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_interpreters.py b/tests/test_interpreters.py index 788bfac6ef..6094c7e993 100644 --- a/tests/test_interpreters.py +++ b/tests/test_interpreters.py @@ -7,13 +7,13 @@ import pytest +@pytest.mark.skipif( + sys.platform.startswith("emscripten") or sys.version_info < (3, 12), + reason="Requires indepedent subinterpreter support", +) def test_interpreters(): """Makes sure the internals object differs across subinterpreters""" - if sys.version_info < (3, 12): - pytest.skip("Test requires independent subinterpreter support (3.12+)") - return - sys.path.append(".") i = None From 3a12e108e179ead41723a0859c2187c145472a61 Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 9 May 2025 19:01:35 -0400 Subject: [PATCH 18/35] Switch the internals pointer pointer to a unique_ptr pointer --- include/pybind11/detail/internals.h | 30 +++++++++++++-------------- include/pybind11/embed.h | 12 +++++------ tests/test_embed/test_interpreter.cpp | 10 ++++----- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 5084892643..1d50c1123b 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -313,12 +313,12 @@ inline std::atomic &get_interpreter_counter() { /// Each module locally stores a pointer to the `internals` data. The data /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. -inline internals **&get_internals_pp() { +inline std::unique_ptr *&get_internals_pp() { #ifdef PYBIND11_SUBINTERPRETER_SUPPORT if (get_interpreter_counter() > 1) { // Internals is one per interpreter. When multiple interpreters are alive in different // threads we have to allow them to have different internals, so we need a thread_local. - static thread_local internals **t_internals_pp = nullptr; + static thread_local std::unique_ptr *t_internals_pp = nullptr; static thread_local PyInterpreterState *istate_cached = nullptr; // Whenever the interpreter changes on the current thread we need to invalidate the // internals_pp so that it can be pulled from the interpreter's state dict. That is slow, @@ -335,7 +335,7 @@ inline internals **&get_internals_pp() { return t_internals_pp; } #endif - static internals **s_internals_pp = nullptr; + static std::unique_ptr *s_internals_pp = nullptr; return s_internals_pp; } @@ -465,8 +465,8 @@ inline object get_python_state_dict() { } template -inline InternalsType **get_internals_pp_from_capsule_in_state_dict(dict &state_dict, - char const *state_dict_key) { +inline std::unique_ptr * +get_internals_pp_from_capsule_in_state_dict(dict &state_dict, char const *state_dict_key) { auto internals_obj = reinterpret_steal(dict_getitemstringref(state_dict.ptr(), state_dict_key)); if (internals_obj) { @@ -476,14 +476,14 @@ inline InternalsType **get_internals_pp_from_capsule_in_state_dict(dict &state_d "pybind11::detail::get_internals_pp_from_capsule_in_state_dict() FAILED"); throw error_already_set(); } - return reinterpret_cast(raw_ptr); + return reinterpret_cast *>(raw_ptr); } return nullptr; } /// Return a reference to the current `internals` data PYBIND11_NOINLINE internals &get_internals() { - auto **&internals_pp = get_internals_pp(); + auto *&internals_pp = get_internals_pp(); if (internals_pp && *internals_pp) { // This is the fast path, everything is already setup, just return it return **internals_pp; @@ -499,7 +499,7 @@ PYBIND11_NOINLINE internals &get_internals() { internals_pp = get_internals_pp_from_capsule_in_state_dict(state_dict, PYBIND11_INTERNALS_ID); if (!internals_pp) { - internals_pp = new internals *(nullptr); + internals_pp = new std::unique_ptr; state_dict[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); } @@ -521,7 +521,7 @@ PYBIND11_NOINLINE internals &get_internals() { #endif } else { auto &internals_ptr = *internals_pp; - internals_ptr = new internals(); + internals_ptr.reset(new internals()); if (!internals_ptr->instance_base) { // This calls get_internals, so cannot be called from within the internals constructor @@ -544,12 +544,12 @@ struct local_internals { std::forward_list registered_exception_translators; }; -inline local_internals **&get_local_internals_pp() { +inline std::unique_ptr *&get_local_internals_pp() { #ifdef PYBIND11_SUBINTERPRETER_SUPPORT if (get_interpreter_counter() > 1) { // Internals is one per interpreter. When multiple interpreters are alive in different // threads we have to allow them to have different internals, so we need a thread_local. - static thread_local local_internals **t_internals_pp = nullptr; + static thread_local std::unique_ptr *t_internals_pp = nullptr; static thread_local PyInterpreterState *istate_cached = nullptr; // Whenever the interpreter changes on the current thread we need to invalidate the // internals_pp so that it can be pulled from the interpreter's state dict. That is slow, @@ -566,7 +566,7 @@ inline local_internals **&get_local_internals_pp() { return t_internals_pp; } #endif - static local_internals **s_internals_pp = nullptr; + static std::unique_ptr *s_internals_pp = nullptr; return s_internals_pp; } @@ -582,7 +582,7 @@ inline char const *get_local_internals_id() { /// Works like `get_internals`, but for things which are locally registered. inline local_internals &get_local_internals() { - auto **&local_internals_pp = get_local_internals_pp(); + auto *&local_internals_pp = get_local_internals_pp(); if (local_internals_pp && *local_internals_pp) { return **local_internals_pp; } @@ -595,12 +595,12 @@ inline local_internals &get_local_internals() { local_internals_pp = get_internals_pp_from_capsule_in_state_dict( state_dict, get_local_internals_id()); if (!local_internals_pp) { - local_internals_pp = new local_internals *(nullptr); + local_internals_pp = new std::unique_ptr; state_dict[get_local_internals_id()] = capsule(reinterpret_cast(local_internals_pp)); } if (!*local_internals_pp) { - *local_internals_pp = new local_internals(); + local_internals_pp->reset(new local_internals()); } return **local_internals_pp; diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 9d964edee2..92d6d0764d 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -237,8 +237,8 @@ inline void finalize_interpreter() { // Get the internals pointer (without creating it if it doesn't exist). It's possible for the // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()` // during destruction), so we get the pointer-pointer here and check it after Py_Finalize(). - auto **&internals_ptr_ptr = detail::get_internals_pp(); - auto **&local_internals_ptr_ptr = detail::get_local_internals_pp(); + auto *&internals_ptr_ptr = detail::get_internals_pp(); + auto *&local_internals_ptr_ptr = detail::get_local_internals_pp(); { dict state_dict = detail::get_python_state_dict(); internals_ptr_ptr = detail::get_internals_pp_from_capsule_in_state_dict( @@ -251,15 +251,13 @@ inline void finalize_interpreter() { Py_Finalize(); if (internals_ptr_ptr) { - delete *internals_ptr_ptr; - *internals_ptr_ptr = nullptr; + internals_ptr_ptr->reset(); } // Local internals contains data managed by the current interpreter, so we must clear them to // avoid undefined behaviors when initializing another interpreter - if (local_internals_ptr_ptr && *local_internals_ptr_ptr) { - delete *local_internals_ptr_ptr; - *local_internals_ptr_ptr = nullptr; + if (local_internals_ptr_ptr) { + local_internals_ptr_ptr->reset(); } // We know there is no interpreter alive now, so we can reset the count diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 7be18b7065..d704e0241a 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -261,8 +261,8 @@ bool has_state_dict_internals_obj() { } bool has_pybind11_internals_static() { - auto **&ipp = py::detail::get_internals_pp(); - return (ipp != nullptr) && (*ipp != nullptr); + auto *&ipp = py::detail::get_internals_pp(); + return ipp && (*ipp != nullptr); } TEST_CASE("Restart the interpreter") { @@ -274,7 +274,7 @@ TEST_CASE("Restart the interpreter") { == 123); // local and foreign module internals should point to the same internals: - REQUIRE(reinterpret_cast(*py::detail::get_internals_pp()) + REQUIRE(reinterpret_cast(py::detail::get_internals_pp()->get()) == py::module_::import("external_module").attr("internals_at")().cast()); // Restart the interpreter. @@ -290,7 +290,7 @@ TEST_CASE("Restart the interpreter") { pybind11::detail::get_internals(); REQUIRE(has_state_dict_internals_obj()); REQUIRE(has_pybind11_internals_static()); - REQUIRE(reinterpret_cast(*py::detail::get_internals_pp()) + REQUIRE(reinterpret_cast(py::detail::get_internals_pp()->get()) == py::module_::import("external_module").attr("internals_at")().cast()); // Make sure that an interpreter with no get_internals() created until finalize still gets the @@ -357,7 +357,7 @@ TEST_CASE("Subinterpreter") { auto ext_int = py::module_::import("external_module").attr("internals_at")().cast(); py::detail::get_internals(); REQUIRE(has_pybind11_internals_static()); - REQUIRE(reinterpret_cast(*py::detail::get_internals_pp()) == ext_int); + REQUIRE(reinterpret_cast(py::detail::get_internals_pp()->get()) == ext_int); #else // This static is still defined REQUIRE(has_pybind11_internals_static()); From 46e609f9b489d68f409d5cad149d761dd41d9ddd Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 9 May 2025 19:07:31 -0400 Subject: [PATCH 19/35] Spelling --- tests/test_interpreters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_interpreters.py b/tests/test_interpreters.py index 6094c7e993..1be24d4220 100644 --- a/tests/test_interpreters.py +++ b/tests/test_interpreters.py @@ -9,7 +9,7 @@ @pytest.mark.skipif( sys.platform.startswith("emscripten") or sys.version_info < (3, 12), - reason="Requires indepedent subinterpreter support", + reason="Requires independent subinterpreter support", ) def test_interpreters(): """Makes sure the internals object differs across subinterpreters""" From c790bfa449596e4922d7902dbc37b814a066dbde Mon Sep 17 00:00:00 2001 From: b-pass Date: Fri, 9 May 2025 19:34:34 -0400 Subject: [PATCH 20/35] Fix clang-tidy warning, compare pointer to nullptr --- tests/test_embed/test_interpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index d704e0241a..0c0077b4ba 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -262,7 +262,7 @@ bool has_state_dict_internals_obj() { bool has_pybind11_internals_static() { auto *&ipp = py::detail::get_internals_pp(); - return ipp && (*ipp != nullptr); + return (ipp != nullptr) && *ipp; } TEST_CASE("Restart the interpreter") { From 427ea25a270d9fbec47281404a6d7e553b9b72a4 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 10 May 2025 18:22:36 -0400 Subject: [PATCH 21/35] Rename get_interpreter_counter to get_num_interpreters_seen --- include/pybind11/detail/common.h | 2 +- include/pybind11/detail/internals.h | 6 ++--- include/pybind11/embed.h | 4 +-- tests/test_embed/test_interpreter.cpp | 14 +++++----- tests/test_interpreters.py | 38 +++++++++++---------------- 5 files changed, 28 insertions(+), 36 deletions(-) diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 7bc937b864..2c197d1b85 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -414,7 +414,7 @@ PYBIND11_WARNING_DISABLE_CLANG("-Wgnu-zero-variadic-macro-arguments") return m.ptr(); \ } \ int PYBIND11_CONCAT(pybind11_exec_, name)(PyObject * pm) { \ - pybind11::detail::get_interpreter_counter()++; \ + pybind11::detail::get_num_interpreters_seen() += 1; \ try { \ auto m = pybind11::reinterpret_borrow<::pybind11::module_>(pm); \ PYBIND11_CONCAT(pybind11_init_, name)(m); \ diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 1d50c1123b..493104a320 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -306,7 +306,7 @@ inline PyThreadState *get_thread_state_unchecked() { /// We use this counter to figure out if there are or have been multiple subinterpreters active at /// any point. This must never decrease while any interpreter may be running in any thread! -inline std::atomic &get_interpreter_counter() { +inline std::atomic &get_num_interpreters_seen() { static std::atomic counter(0); return counter; } @@ -315,7 +315,7 @@ inline std::atomic &get_interpreter_counter() { /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. inline std::unique_ptr *&get_internals_pp() { #ifdef PYBIND11_SUBINTERPRETER_SUPPORT - if (get_interpreter_counter() > 1) { + if (get_num_interpreters_seen() > 1) { // Internals is one per interpreter. When multiple interpreters are alive in different // threads we have to allow them to have different internals, so we need a thread_local. static thread_local std::unique_ptr *t_internals_pp = nullptr; @@ -546,7 +546,7 @@ struct local_internals { inline std::unique_ptr *&get_local_internals_pp() { #ifdef PYBIND11_SUBINTERPRETER_SUPPORT - if (get_interpreter_counter() > 1) { + if (get_num_interpreters_seen() > 1) { // Internals is one per interpreter. When multiple interpreters are alive in different // threads we have to allow them to have different internals, so we need a thread_local. static thread_local std::unique_ptr *t_internals_pp = nullptr; diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 92d6d0764d..3e981dc591 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -195,7 +195,7 @@ inline void initialize_interpreter(bool init_signal_handlers = true, #endif // There is exactly one interpreter alive currently. - detail::get_interpreter_counter() = 1; + detail::get_num_interpreters_seen() = 1; } /** \rst @@ -261,7 +261,7 @@ inline void finalize_interpreter() { } // We know there is no interpreter alive now, so we can reset the count - detail::get_interpreter_counter() = 0; + detail::get_num_interpreters_seen() = 0; } /** \rst diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 0c0077b4ba..40da08b86f 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -343,7 +343,7 @@ TEST_CASE("Subinterpreter") { auto *main_tstate = PyThreadState_Get(); auto *sub_tstate = Py_NewInterpreter(); - py::detail::get_interpreter_counter()++; + py::detail::get_num_interpreters_seen()++; // Subinterpreters get their own copy of builtins. REQUIRE_FALSE(has_state_dict_internals_obj()); @@ -378,7 +378,7 @@ TEST_CASE("Subinterpreter") { // Restore main interpreter. Py_EndInterpreter(sub_tstate); - py::detail::get_interpreter_counter() = 1; + py::detail::get_num_interpreters_seen() = 1; PyThreadState_Swap(main_tstate); REQUIRE(py::hasattr(py::module_::import("__main__"), "main_tag")); @@ -398,7 +398,7 @@ TEST_CASE("Multiple Subinterpreters") { /// Create and switch to a subinterpreter. auto *sub1_tstate = Py_NewInterpreter(); - py::detail::get_interpreter_counter()++; + py::detail::get_num_interpreters_seen()++; py::list(py::module_::import("sys").attr("path")).append(py::str(".")); @@ -414,7 +414,7 @@ TEST_CASE("Multiple Subinterpreters") { // Create another interpreter auto *sub2_tstate = Py_NewInterpreter(); - py::detail::get_interpreter_counter()++; + py::detail::get_num_interpreters_seen()++; py::list(py::module_::import("sys").attr("path")).append(py::str(".")); @@ -450,7 +450,7 @@ TEST_CASE("Multiple Subinterpreters") { PyThreadState_Swap(sub2_tstate); Py_EndInterpreter(sub2_tstate); - py::detail::get_interpreter_counter() = 1; + py::detail::get_num_interpreters_seen() = 1; PyThreadState_Swap(main_tstate); } #endif @@ -496,7 +496,7 @@ TEST_CASE("Per-Subinterpreter GIL") { auto status = Py_NewInterpreterFromConfig(&sub, &cfg); T_REQUIRE(!PyStatus_IsError(status)); - py::detail::get_interpreter_counter()++; + py::detail::get_num_interpreters_seen()++; py::list(py::module_::import("sys").attr("path")).append(py::str(".")); @@ -590,7 +590,7 @@ TEST_CASE("Per-Subinterpreter GIL") { == "1"); // the threads are stopped. we can now lower this for the rest of the test - py::detail::get_interpreter_counter() = 1; + py::detail::get_num_interpreters_seen() = 1; // make sure nothing unexpected happened inside the threads, now that they are completed REQUIRE(failure == 0); diff --git a/tests/test_interpreters.py b/tests/test_interpreters.py index 1be24d4220..ed2a5ae27e 100644 --- a/tests/test_interpreters.py +++ b/tests/test_interpreters.py @@ -8,29 +8,21 @@ @pytest.mark.skipif( - sys.platform.startswith("emscripten") or sys.version_info < (3, 12), - reason="Requires independent subinterpreter support", + sys.platform.startswith("emscripten"), reason="Requires loadable modules" ) def test_interpreters(): """Makes sure the internals object differs across subinterpreters""" sys.path.append(".") - i = None - try: - # 3.14+ - import interpreters as i - except ImportError: - try: - # 3.13 - import _interpreters as i - except ImportError: - try: - # 3.12 - import _xxsubinterpreters as i - except ImportError: - pytest.skip("Test requires a the interpreters stdlib module") - return + if sys.version_info >= (3, 14): + import interpreters + elif sys.version_info >= (3, 13): + import _interpreters as interpreters + elif sys.version_info >= (3, 12): + import _xxsubinterpreters as interpreters + else: + pytest.skip("Test requires a the interpreters stdlib module") import mod_test_interpreters as m @@ -41,16 +33,16 @@ def test_interpreters(): pickle.dump(m.internals_at(), f) """ - interp1 = i.create() - interp2 = i.create() + interp1 = interpreters.create() + interp2 = interpreters.create() try: pipei, pipeo = os.pipe() - i.run_string(interp1, code, shared={"pipeo": pipeo}) + interpreters.run_string(interp1, code, shared={"pipeo": pipeo}) with open(pipei, "rb") as f: res1 = pickle.load(f) pipei, pipeo = os.pipe() - i.run_string(interp2, code, shared={"pipeo": pipeo}) + interpreters.run_string(interp2, code, shared={"pipeo": pipeo}) with open(pipei, "rb") as f: res2 = pickle.load(f) @@ -63,8 +55,8 @@ def test_interpreters(): "internals should be the same within the main interpreter" ) finally: - i.destroy(interp1) - i.destroy(interp2) + interpreters.destroy(interp1) + interpreters.destroy(interp2) assert res1 != m.internals_at(), "internals should differ from main interpreter" assert res2 != m.internals_at(), "internals should differ from main interpreter" From 048dd042f4e0e1554d7f15e1bc54e64d19468b2e Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 10 May 2025 18:28:58 -0400 Subject: [PATCH 22/35] Try simplifying the test's cmake set_target_properties --- tests/CMakeLists.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 51e3c16bbe..c4bafe4f83 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -567,12 +567,7 @@ if(NOT PYBIND11_CUDA_TESTS) # This module doesn't get mixed with other test modules because those aren't subinterpreter safe. pybind11_add_module(mod_test_interpreters THIN_LTO mod_test_interpreters.cpp) set_target_properties(mod_test_interpreters PROPERTIES LIBRARY_OUTPUT_DIRECTORY - "${CMAKE_CURRENT_BINARY_DIR}") - foreach(config ${CMAKE_CONFIGURATION_TYPES}) - string(TOUPPER ${config} config) - set_target_properties(mod_test_interpreters PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${config} - "${CMAKE_CURRENT_BINARY_DIR}") - endforeach() + "$<1:${CMAKE_CURRENT_BINARY_DIR}>") add_dependencies(pytest mod_test_interpreters) endif() From 82dda7952def5f6fc7f12667adbd7bb89e6c5152 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 10 May 2025 19:16:24 -0400 Subject: [PATCH 23/35] Replace mod_* tags with a single tag w/enum Update tests accordingly --- include/pybind11/pybind11.h | 53 ++++++++++------------------ tests/mod_test_interpreters.cpp | 3 +- tests/test_embed/external_module.cpp | 4 +-- tests/test_interpreters.py | 9 +++-- 4 files changed, 29 insertions(+), 40 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index bb6a593c92..686e9ad3e0 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1267,24 +1267,19 @@ class mod_gil_not_used { bool flag_; }; -// Use to activate Py_MOD_PER_INTERPRETER_GIL_SUPPORTED -class mod_per_interpreter_gil { +class multiple_interpreters { public: - explicit mod_per_interpreter_gil(bool flag = true) : flag_(flag) {} - bool flag() const { return flag_; } - -private: - bool flag_; -}; + enum level { + not_supported, /// Use to activate Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED + shared_gil, /// Use to activate Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED + per_interpreter_gil /// Use to activate Py_MOD_PER_INTERPRETER_GIL_SUPPORTED + }; -// Use to activate Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED -class mod_multi_interpreter_one_gil { -public: - explicit mod_multi_interpreter_one_gil(bool flag = true) : flag_(flag) {} - bool flag() const { return flag_; } + explicit multiple_interpreters(level l) : level_(l) {} + level value() const { return level_; } private: - bool flag_; + level level_; }; PYBIND11_NAMESPACE_BEGIN(detail) @@ -1302,29 +1297,20 @@ inline bool gil_not_used_option(F &&, O &&...o) { } #ifdef Py_mod_multiple_interpreters -inline void *multi_interp_option() { return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; } -template -void *multi_interp_option(F &&, O &&...o); +inline void *multi_interp_slot() { return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; } template -void *multi_interp_option(mod_multi_interpreter_one_gil f, O &&...o); -template -inline void *multi_interp_option(mod_per_interpreter_gil f, O &&...o) { - if (f.flag()) { +inline void *multi_interp_slot(multiple_interpreters mi, O &&...o) { + if (mi.value() == multiple_interpreters::per_interpreter_gil) { return Py_MOD_PER_INTERPRETER_GIL_SUPPORTED; + } else if (mi.value() == multiple_interpreters::shared_gil) { + return Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED; + } else { + return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; } - return multi_interp_option(o...); -} -template -inline void *multi_interp_option(mod_multi_interpreter_one_gil f, O &&...o) { - void *others = multi_interp_option(o...); - if (!f.flag() || others == Py_MOD_PER_INTERPRETER_GIL_SUPPORTED) { - return others; - } - return Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED; } template -inline void *multi_interp_option(F &&, O &&...o) { - return multi_interp_option(o...); +inline void *multi_interp_slot(F &&, O &&...o) { + return multi_interp_slot(o...); } #endif @@ -1506,8 +1492,7 @@ class module_ : public object { if (next_slot >= term_slot) { pybind11_fail("initialize_multiphase_module_def: not enough space in slots"); } - slots[next_slot++] - = {Py_mod_multiple_interpreters, detail::multi_interp_option(options...)}; + slots[next_slot++] = {Py_mod_multiple_interpreters, detail::multi_interp_slot(options...)}; #endif if (detail::gil_not_used_option(options...)) { diff --git a/tests/mod_test_interpreters.cpp b/tests/mod_test_interpreters.cpp index 70a7662aa9..87136af4f5 100644 --- a/tests/mod_test_interpreters.cpp +++ b/tests/mod_test_interpreters.cpp @@ -8,9 +8,8 @@ namespace py = pybind11; PYBIND11_MODULE(mod_test_interpreters, m, - py::mod_multi_interpreter_one_gil(), py::mod_gil_not_used(), - py::mod_per_interpreter_gil()) { + py::multiple_interpreters(py::multiple_interpreters::per_interpreter_gil)) { m.def("internals_at", []() { return reinterpret_cast(&py::detail::get_internals()); }); } diff --git a/tests/test_embed/external_module.cpp b/tests/test_embed/external_module.cpp index d4f82d38c5..bf937586fd 100644 --- a/tests/test_embed/external_module.cpp +++ b/tests/test_embed/external_module.cpp @@ -8,9 +8,9 @@ namespace py = pybind11; PYBIND11_MODULE(external_module, m, - py::mod_multi_interpreter_one_gil(), py::mod_gil_not_used(), - py::mod_per_interpreter_gil()) { + py::multiple_interpreters(py::multiple_interpreters::per_interpreter_gil)) { + class A { public: explicit A(int value) : v{value} {}; diff --git a/tests/test_interpreters.py b/tests/test_interpreters.py index ed2a5ae27e..e0da08fbba 100644 --- a/tests/test_interpreters.py +++ b/tests/test_interpreters.py @@ -49,8 +49,6 @@ def test_interpreters(): # do this while the two interpreters are active import mod_test_interpreters as m2 - print(dir(m)) - print(dir(m2)) assert m.internals_at() == m2.internals_at(), ( "internals should be the same within the main interpreter" ) @@ -61,3 +59,10 @@ def test_interpreters(): assert res1 != m.internals_at(), "internals should differ from main interpreter" assert res2 != m.internals_at(), "internals should differ from main interpreter" assert res1 != res2, "internals should differ between interpreters" + + # do this after the two interpreters are destroyed and only one remains + import mod_test_interpreters as m3 + + assert m.internals_at() == m3.internals_at(), ( + "internals should be the same within the main interpreter" + ) From eb460c04866e48a70bd05cd979c7a6b41df8b22a Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 10 May 2025 19:41:12 -0400 Subject: [PATCH 24/35] Add a test for shared-GIL (legacy) subinterpreters --- tests/CMakeLists.txt | 5 ++- tests/mod_test_interpreters2.cpp | 14 ++++++++ tests/test_interpreters.py | 61 ++++++++++++++++++++++++++++++-- 3 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 tests/mod_test_interpreters2.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c4bafe4f83..0a37d4fe11 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -566,9 +566,12 @@ add_custom_target( if(NOT PYBIND11_CUDA_TESTS) # This module doesn't get mixed with other test modules because those aren't subinterpreter safe. pybind11_add_module(mod_test_interpreters THIN_LTO mod_test_interpreters.cpp) + pybind11_add_module(mod_test_interpreters2 THIN_LTO mod_test_interpreters2.cpp) set_target_properties(mod_test_interpreters PROPERTIES LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_CURRENT_BINARY_DIR}>") - add_dependencies(pytest mod_test_interpreters) + set_target_properties(mod_test_interpreters2 PROPERTIES LIBRARY_OUTPUT_DIRECTORY + "$<1:${CMAKE_CURRENT_BINARY_DIR}>") + add_dependencies(pytest mod_test_interpreters mod_test_interpreters2) endif() if(PYBIND11_TEST_OVERRIDE) diff --git a/tests/mod_test_interpreters2.cpp b/tests/mod_test_interpreters2.cpp new file mode 100644 index 0000000000..e69589666b --- /dev/null +++ b/tests/mod_test_interpreters2.cpp @@ -0,0 +1,14 @@ +#include + +namespace py = pybind11; + +/* Simple test module/test class to check that the referenced internals data of external pybind11 + * modules are different across subinterpreters + */ + +PYBIND11_MODULE(mod_test_interpreters2, + m, + py::multiple_interpreters(py::multiple_interpreters::shared_gil)) { + m.def("internals_at", + []() { return reinterpret_cast(&py::detail::get_internals()); }); +} diff --git a/tests/test_interpreters.py b/tests/test_interpreters.py index e0da08fbba..5c280a17f5 100644 --- a/tests/test_interpreters.py +++ b/tests/test_interpreters.py @@ -10,8 +10,8 @@ @pytest.mark.skipif( sys.platform.startswith("emscripten"), reason="Requires loadable modules" ) -def test_interpreters(): - """Makes sure the internals object differs across subinterpreters""" +def test_independent_subinterpreters(): + """Makes sure the internals object differs across independent subinterpreters""" sys.path.append(".") @@ -36,6 +36,8 @@ def test_interpreters(): interp1 = interpreters.create() interp2 = interpreters.create() try: + res0 = interpreters.run_string(interp1, "import mod_test_interpreters2") + pipei, pipeo = os.pipe() interpreters.run_string(interp1, code, shared={"pipeo": pipeo}) with open(pipei, "rb") as f: @@ -56,6 +58,9 @@ def test_interpreters(): interpreters.destroy(interp1) interpreters.destroy(interp2) + assert "does not support loading in subinterpreters" in res0.msg, ( + "cannot use shared_gil in a default subinterpreter" + ) assert res1 != m.internals_at(), "internals should differ from main interpreter" assert res2 != m.internals_at(), "internals should differ from main interpreter" assert res1 != res2, "internals should differ between interpreters" @@ -66,3 +71,55 @@ def test_interpreters(): assert m.internals_at() == m3.internals_at(), ( "internals should be the same within the main interpreter" ) + + +@pytest.mark.skipif( + sys.platform.startswith("emscripten"), reason="Requires loadable modules" +) +def test_dependent_subinterpreters(): + """Makes sure the internals object differs across subinterpreters""" + + sys.path.append(".") + + if sys.version_info >= (3, 14): + import interpreters + elif sys.version_info >= (3, 13): + import _interpreters as interpreters + elif sys.version_info >= (3, 12): + import _xxsubinterpreters as interpreters + else: + pytest.skip("Test requires a the interpreters stdlib module") + + import mod_test_interpreters2 as m + + code = """ +import mod_test_interpreters2 as m +import pickle +with open(pipeo, 'wb') as f: + pickle.dump(m.internals_at(), f) +""" + + interp1 = interpreters.create("legacy") + try: + pipei, pipeo = os.pipe() + interpreters.run_string(interp1, code, shared={"pipeo": pipeo}) + with open(pipei, "rb") as f: + res1 = pickle.load(f) + + # do this while the two interpreters are active + import mod_test_interpreters2 as m2 + + assert m.internals_at() == m2.internals_at(), ( + "internals should be the same within the main interpreter" + ) + finally: + interpreters.destroy(interp1) + + assert res1 != m.internals_at(), "internals should differ from main interpreter" + + # do this after the two interpreters are destroyed and only one remains + import mod_test_interpreters2 as m3 + + assert m.internals_at() == m3.internals_at(), ( + "internals should be the same within the main interpreter" + ) From d18e03e1b4f545a7d45291713c23f7f9c4604784 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 10 May 2025 20:03:50 -0400 Subject: [PATCH 25/35] Update test to work around differences in the various versions of interpreters modules --- tests/test_interpreters.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/test_interpreters.py b/tests/test_interpreters.py index 5c280a17f5..2a25971843 100644 --- a/tests/test_interpreters.py +++ b/tests/test_interpreters.py @@ -36,7 +36,12 @@ def test_independent_subinterpreters(): interp1 = interpreters.create() interp2 = interpreters.create() try: - res0 = interpreters.run_string(interp1, "import mod_test_interpreters2") + try: + res0 = interpreters.run_string(interp1, "import mod_test_interpreters2") + if res0 is not None: + res0 = res0.msg + except Exception as e: + res0 = str(e) pipei, pipeo = os.pipe() interpreters.run_string(interp1, code, shared={"pipeo": pipeo}) @@ -58,7 +63,7 @@ def test_independent_subinterpreters(): interpreters.destroy(interp1) interpreters.destroy(interp2) - assert "does not support loading in subinterpreters" in res0.msg, ( + assert "does not support loading in subinterpreters" in res0, ( "cannot use shared_gil in a default subinterpreter" ) assert res1 != m.internals_at(), "internals should differ from main interpreter" @@ -99,7 +104,11 @@ def test_dependent_subinterpreters(): pickle.dump(m.internals_at(), f) """ - interp1 = interpreters.create("legacy") + try: + interp1 = interpreters.create("legacy") + except TypeError: + pytest.skip("interpreters module needs to support legacy config") + try: pipei, pipeo = os.pipe() interpreters.run_string(interp1, code, shared={"pipeo": pipeo}) From 45f35c6c4f2af20076cd3d35bd81c393854f7676 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 10 May 2025 20:16:59 -0400 Subject: [PATCH 26/35] Fix unused parameter --- include/pybind11/pybind11.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 686e9ad3e0..f27680b6b3 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1305,7 +1305,7 @@ inline void *multi_interp_slot(multiple_interpreters mi, O &&...o) { } else if (mi.value() == multiple_interpreters::shared_gil) { return Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED; } else { - return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; + return multi_interp_slot(o...); } } template From 0550a1ba6be2afc68975ed685e4a90b827cdbec8 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sun, 11 May 2025 19:09:27 -0400 Subject: [PATCH 27/35] Rename tests and associated test modules. --- tests/CMakeLists.txt | 16 +++++++------- ...reters.cpp => mod_per_interpreter_gil.cpp} | 2 +- ...rs2.cpp => mod_shared_interpreter_gil.cpp} | 2 +- ...eters.py => test_multiple_interpreters.py} | 22 +++++++++---------- 4 files changed, 21 insertions(+), 21 deletions(-) rename tests/{mod_test_interpreters.cpp => mod_per_interpreter_gil.cpp} (92%) rename tests/{mod_test_interpreters2.cpp => mod_shared_interpreter_gil.cpp} (90%) rename tests/{test_interpreters.py => test_multiple_interpreters.py} (88%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0a37d4fe11..4720c58b9f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -148,13 +148,13 @@ set(PYBIND11_TEST_FILES test_exceptions test_factory_constructors test_gil_scoped - test_interpreters.py test_iostream test_kwargs_and_defaults test_local_bindings test_methods_and_attributes test_modules test_multiple_inheritance + test_multiple_interpreters.py test_native_enum test_numpy_array test_numpy_dtypes @@ -565,13 +565,13 @@ add_custom_target( if(NOT PYBIND11_CUDA_TESTS) # This module doesn't get mixed with other test modules because those aren't subinterpreter safe. - pybind11_add_module(mod_test_interpreters THIN_LTO mod_test_interpreters.cpp) - pybind11_add_module(mod_test_interpreters2 THIN_LTO mod_test_interpreters2.cpp) - set_target_properties(mod_test_interpreters PROPERTIES LIBRARY_OUTPUT_DIRECTORY - "$<1:${CMAKE_CURRENT_BINARY_DIR}>") - set_target_properties(mod_test_interpreters2 PROPERTIES LIBRARY_OUTPUT_DIRECTORY - "$<1:${CMAKE_CURRENT_BINARY_DIR}>") - add_dependencies(pytest mod_test_interpreters mod_test_interpreters2) + pybind11_add_module(mod_per_interpreter_gil THIN_LTO mod_per_interpreter_gil.cpp) + pybind11_add_module(mod_shared_interpreter_gil THIN_LTO mod_shared_interpreter_gil.cpp) + set_target_properties(mod_per_interpreter_gil PROPERTIES LIBRARY_OUTPUT_DIRECTORY + "$<1:${CMAKE_CURRENT_BINARY_DIR}>") + set_target_properties(mod_shared_interpreter_gil PROPERTIES LIBRARY_OUTPUT_DIRECTORY + "$<1:${CMAKE_CURRENT_BINARY_DIR}>") + add_dependencies(pytest mod_per_interpreter_gil mod_shared_interpreter_gil) endif() if(PYBIND11_TEST_OVERRIDE) diff --git a/tests/mod_test_interpreters.cpp b/tests/mod_per_interpreter_gil.cpp similarity index 92% rename from tests/mod_test_interpreters.cpp rename to tests/mod_per_interpreter_gil.cpp index 87136af4f5..7087025829 100644 --- a/tests/mod_test_interpreters.cpp +++ b/tests/mod_per_interpreter_gil.cpp @@ -6,7 +6,7 @@ namespace py = pybind11; * modules are different across subinterpreters */ -PYBIND11_MODULE(mod_test_interpreters, +PYBIND11_MODULE(mod_per_interpreter_gil, m, py::mod_gil_not_used(), py::multiple_interpreters(py::multiple_interpreters::per_interpreter_gil)) { diff --git a/tests/mod_test_interpreters2.cpp b/tests/mod_shared_interpreter_gil.cpp similarity index 90% rename from tests/mod_test_interpreters2.cpp rename to tests/mod_shared_interpreter_gil.cpp index e69589666b..af08800eca 100644 --- a/tests/mod_test_interpreters2.cpp +++ b/tests/mod_shared_interpreter_gil.cpp @@ -6,7 +6,7 @@ namespace py = pybind11; * modules are different across subinterpreters */ -PYBIND11_MODULE(mod_test_interpreters2, +PYBIND11_MODULE(mod_shared_interpreter_gil, m, py::multiple_interpreters(py::multiple_interpreters::shared_gil)) { m.def("internals_at", diff --git a/tests/test_interpreters.py b/tests/test_multiple_interpreters.py similarity index 88% rename from tests/test_interpreters.py rename to tests/test_multiple_interpreters.py index 2a25971843..d7321171bd 100644 --- a/tests/test_interpreters.py +++ b/tests/test_multiple_interpreters.py @@ -24,10 +24,10 @@ def test_independent_subinterpreters(): else: pytest.skip("Test requires a the interpreters stdlib module") - import mod_test_interpreters as m + import mod_per_interpreter_gil as m code = """ -import mod_test_interpreters as m +import mod_per_interpreter_gil as m import pickle with open(pipeo, 'wb') as f: pickle.dump(m.internals_at(), f) @@ -37,7 +37,7 @@ def test_independent_subinterpreters(): interp2 = interpreters.create() try: try: - res0 = interpreters.run_string(interp1, "import mod_test_interpreters2") + res0 = interpreters.run_string(interp1, "import mod_shared_interpreter_gil") if res0 is not None: res0 = res0.msg except Exception as e: @@ -54,7 +54,7 @@ def test_independent_subinterpreters(): res2 = pickle.load(f) # do this while the two interpreters are active - import mod_test_interpreters as m2 + import mod_per_interpreter_gil as m2 assert m.internals_at() == m2.internals_at(), ( "internals should be the same within the main interpreter" @@ -71,7 +71,7 @@ def test_independent_subinterpreters(): assert res1 != res2, "internals should differ between interpreters" # do this after the two interpreters are destroyed and only one remains - import mod_test_interpreters as m3 + import mod_per_interpreter_gil as m3 assert m.internals_at() == m3.internals_at(), ( "internals should be the same within the main interpreter" @@ -95,10 +95,10 @@ def test_dependent_subinterpreters(): else: pytest.skip("Test requires a the interpreters stdlib module") - import mod_test_interpreters2 as m + import mod_shared_interpreter_gil as m code = """ -import mod_test_interpreters2 as m +import mod_shared_interpreter_gil as m import pickle with open(pipeo, 'wb') as f: pickle.dump(m.internals_at(), f) @@ -115,8 +115,8 @@ def test_dependent_subinterpreters(): with open(pipei, "rb") as f: res1 = pickle.load(f) - # do this while the two interpreters are active - import mod_test_interpreters2 as m2 + # do this while the other interpreter is active + import mod_shared_interpreter_gil as m2 assert m.internals_at() == m2.internals_at(), ( "internals should be the same within the main interpreter" @@ -126,8 +126,8 @@ def test_dependent_subinterpreters(): assert res1 != m.internals_at(), "internals should differ from main interpreter" - # do this after the two interpreters are destroyed and only one remains - import mod_test_interpreters2 as m3 + # do this after the other interpreters are destroyed and only one remains + import mod_shared_interpreter_gil as m3 assert m.internals_at() == m3.internals_at(), ( "internals should be the same within the main interpreter" From dafeaec5557cbce939a5d455dbf68da97f3f5a64 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sun, 11 May 2025 22:08:00 -0400 Subject: [PATCH 28/35] Switch get_internals_pp to a template function --- include/pybind11/detail/internals.h | 61 ++++++++------------------- include/pybind11/embed.h | 4 +- include/pybind11/pybind11.h | 17 +++++--- tests/test_embed/test_interpreter.cpp | 13 ++++-- 4 files changed, 38 insertions(+), 57 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 493104a320..2f585ce8b9 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -251,6 +251,17 @@ struct internals { } }; +// the internals struct (above) is shared between all the modules. local_internals are only +// for a single module. Any changes made to internals may require an update to +// PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design, +// restricted to a single module. Whether a module has local internals or not should not +// impact any other modules, because the only things accessing the local internals is the +// module that contains them. +struct local_internals { + type_map registered_types_cpp; + std::forward_list registered_exception_translators; +}; + enum class holder_enum_t : uint8_t { undefined, std_unique_ptr, // Default, lacking interop with std::shared_ptr. @@ -311,14 +322,13 @@ inline std::atomic &get_num_interpreters_seen() { return counter; } -/// Each module locally stores a pointer to the `internals` data. The data -/// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. -inline std::unique_ptr *&get_internals_pp() { +template +inline std::unique_ptr *&get_internals_pp() { #ifdef PYBIND11_SUBINTERPRETER_SUPPORT if (get_num_interpreters_seen() > 1) { // Internals is one per interpreter. When multiple interpreters are alive in different // threads we have to allow them to have different internals, so we need a thread_local. - static thread_local std::unique_ptr *t_internals_pp = nullptr; + static thread_local std::unique_ptr *t_internals_pp = nullptr; static thread_local PyInterpreterState *istate_cached = nullptr; // Whenever the interpreter changes on the current thread we need to invalidate the // internals_pp so that it can be pulled from the interpreter's state dict. That is slow, @@ -335,7 +345,7 @@ inline std::unique_ptr *&get_internals_pp() { return t_internals_pp; } #endif - static std::unique_ptr *s_internals_pp = nullptr; + static std::unique_ptr *s_internals_pp = nullptr; return s_internals_pp; } @@ -483,7 +493,7 @@ get_internals_pp_from_capsule_in_state_dict(dict &state_dict, char const *state_ /// Return a reference to the current `internals` data PYBIND11_NOINLINE internals &get_internals() { - auto *&internals_pp = get_internals_pp(); + auto *&internals_pp = get_internals_pp(); if (internals_pp && *internals_pp) { // This is the fast path, everything is already setup, just return it return **internals_pp; @@ -533,43 +543,6 @@ PYBIND11_NOINLINE internals &get_internals() { return **internals_pp; } -// the internals struct (above) is shared between all the modules. local_internals are only -// for a single module. Any changes made to internals may require an update to -// PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design, -// restricted to a single module. Whether a module has local internals or not should not -// impact any other modules, because the only things accessing the local internals is the -// module that contains them. -struct local_internals { - type_map registered_types_cpp; - std::forward_list registered_exception_translators; -}; - -inline std::unique_ptr *&get_local_internals_pp() { -#ifdef PYBIND11_SUBINTERPRETER_SUPPORT - if (get_num_interpreters_seen() > 1) { - // Internals is one per interpreter. When multiple interpreters are alive in different - // threads we have to allow them to have different internals, so we need a thread_local. - static thread_local std::unique_ptr *t_internals_pp = nullptr; - static thread_local PyInterpreterState *istate_cached = nullptr; - // Whenever the interpreter changes on the current thread we need to invalidate the - // internals_pp so that it can be pulled from the interpreter's state dict. That is slow, - // so we use the current PyThreadState to check if it is necessary. The caller will see a - // null return and do the fetch from the state dict or create a new one (as needed). - auto *tstate = get_thread_state_unchecked(); - if (!tstate) { - istate_cached = nullptr; - t_internals_pp = nullptr; - } else if (tstate->interp != istate_cached) { - istate_cached = tstate->interp; - t_internals_pp = nullptr; - } - return t_internals_pp; - } -#endif - static std::unique_ptr *s_internals_pp = nullptr; - return s_internals_pp; -} - /// A string key uniquely describing this module inline char const *get_local_internals_id() { // Use the address of this static itself as part of the key, so that the value is uniquely tied @@ -582,7 +555,7 @@ inline char const *get_local_internals_id() { /// Works like `get_internals`, but for things which are locally registered. inline local_internals &get_local_internals() { - auto *&local_internals_pp = get_local_internals_pp(); + auto *&local_internals_pp = get_internals_pp(); if (local_internals_pp && *local_internals_pp) { return **local_internals_pp; } diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 3e981dc591..a456e80a6e 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -237,8 +237,8 @@ inline void finalize_interpreter() { // Get the internals pointer (without creating it if it doesn't exist). It's possible for the // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()` // during destruction), so we get the pointer-pointer here and check it after Py_Finalize(). - auto *&internals_ptr_ptr = detail::get_internals_pp(); - auto *&local_internals_ptr_ptr = detail::get_local_internals_pp(); + auto *&internals_ptr_ptr = detail::get_internals_pp(); + auto *&local_internals_ptr_ptr = detail::get_internals_pp(); { dict state_dict = detail::get_python_state_dict(); internals_ptr_ptr = detail::get_internals_pp_from_capsule_in_state_dict( diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index f27680b6b3..c515513361 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1300,13 +1300,16 @@ inline bool gil_not_used_option(F &&, O &&...o) { inline void *multi_interp_slot() { return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; } template inline void *multi_interp_slot(multiple_interpreters mi, O &&...o) { - if (mi.value() == multiple_interpreters::per_interpreter_gil) { - return Py_MOD_PER_INTERPRETER_GIL_SUPPORTED; - } else if (mi.value() == multiple_interpreters::shared_gil) { - return Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED; - } else { - return multi_interp_slot(o...); - } + switch (mi.value()) { + case multiple_interpreters::per_interpreter_gil: + return Py_MOD_PER_INTERPRETER_GIL_SUPPORTED; + case multiple_interpreters::shared_gil: + return Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED; + case multiple_interpreters::not_supported: + return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; + } + // silence warnings with this unreachable line: + return multi_interp_slot(o...); } template inline void *multi_interp_slot(F &&, O &&...o) { diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 40da08b86f..11e8cb7845 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -261,10 +261,15 @@ bool has_state_dict_internals_obj() { } bool has_pybind11_internals_static() { - auto *&ipp = py::detail::get_internals_pp(); + auto *&ipp = py::detail::get_internals_pp(); return (ipp != nullptr) && *ipp; } +uintptr_t get_details_as_uintptr() { + return reinterpret_cast( + py::detail::get_internals_pp()->get()); +} + TEST_CASE("Restart the interpreter") { // Verify pre-restart state. REQUIRE(py::module_::import("widget_module").attr("add")(1, 2).cast() == 3); @@ -274,7 +279,7 @@ TEST_CASE("Restart the interpreter") { == 123); // local and foreign module internals should point to the same internals: - REQUIRE(reinterpret_cast(py::detail::get_internals_pp()->get()) + REQUIRE(get_details_as_uintptr() == py::module_::import("external_module").attr("internals_at")().cast()); // Restart the interpreter. @@ -290,7 +295,7 @@ TEST_CASE("Restart the interpreter") { pybind11::detail::get_internals(); REQUIRE(has_state_dict_internals_obj()); REQUIRE(has_pybind11_internals_static()); - REQUIRE(reinterpret_cast(py::detail::get_internals_pp()->get()) + REQUIRE(get_details_as_uintptr() == py::module_::import("external_module").attr("internals_at")().cast()); // Make sure that an interpreter with no get_internals() created until finalize still gets the @@ -357,7 +362,7 @@ TEST_CASE("Subinterpreter") { auto ext_int = py::module_::import("external_module").attr("internals_at")().cast(); py::detail::get_internals(); REQUIRE(has_pybind11_internals_static()); - REQUIRE(reinterpret_cast(py::detail::get_internals_pp()->get()) == ext_int); + REQUIRE(get_details_as_uintptr() == ext_int); #else // This static is still defined REQUIRE(has_pybind11_internals_static()); From 7576050ec8fe5b74c1d64cbf7260cbe5bc447433 Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 12 May 2025 06:55:21 -0400 Subject: [PATCH 29/35] Rename curtstate to cur_tstate --- include/pybind11/detail/internals.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 2f585ce8b9..395ffbcb51 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -206,13 +206,13 @@ struct internals { type_map native_enum_type_map; internals() { - PyThreadState *curtstate = PyThreadState_Get(); + PyThreadState *cur_tstate = PyThreadState_Get(); // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) if (!PYBIND11_TLS_KEY_CREATE(tstate)) { pybind11_fail( "internals constructor: could not successfully initialize the tstate TSS key!"); } - PYBIND11_TLS_REPLACE_VALUE(tstate, curtstate); + PYBIND11_TLS_REPLACE_VALUE(tstate, cur_tstate); // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) if (!PYBIND11_TLS_KEY_CREATE(loader_life_support_tls_key)) { @@ -220,7 +220,7 @@ struct internals { "loader_life_support TSS key!"); } - istate = curtstate->interp; + istate = cur_tstate->interp; registered_exception_translators.push_front(&translate_exception); static_property_type = make_static_property_type(); default_metaclass = make_default_metaclass(); From 789d231a8b78d9b9c5b4f334054d5003edb8acd9 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Mon, 12 May 2025 22:51:14 -0400 Subject: [PATCH 30/35] refactor: use simpler names Signed-off-by: Henry Schreiner --- include/pybind11/pybind11.h | 18 ++++++++++++++---- tests/mod_per_interpreter_gil.cpp | 2 +- tests/mod_shared_interpreter_gil.cpp | 2 +- tests/test_embed/external_module.cpp | 2 +- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index c515513361..8bf2f2c466 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1269,12 +1269,22 @@ class mod_gil_not_used { class multiple_interpreters { public: - enum level { + enum class level { not_supported, /// Use to activate Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED shared_gil, /// Use to activate Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED per_interpreter_gil /// Use to activate Py_MOD_PER_INTERPRETER_GIL_SUPPORTED }; + static constexpr level not_supported() { + return level::not_supported; + } + static constexpr level shared_gil() { + return level::shared_gil; + } + static constexpr level per_interpreter_gil() { + return level::per_interpreter_gil; + } + explicit multiple_interpreters(level l) : level_(l) {} level value() const { return level_; } @@ -1301,11 +1311,11 @@ inline void *multi_interp_slot() { return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPO template inline void *multi_interp_slot(multiple_interpreters mi, O &&...o) { switch (mi.value()) { - case multiple_interpreters::per_interpreter_gil: + case multiple_interpreters::level::per_interpreter_gil: return Py_MOD_PER_INTERPRETER_GIL_SUPPORTED; - case multiple_interpreters::shared_gil: + case multiple_interpreters::level::shared_gil: return Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED; - case multiple_interpreters::not_supported: + case multiple_interpreters::level::not_supported: return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; } // silence warnings with this unreachable line: diff --git a/tests/mod_per_interpreter_gil.cpp b/tests/mod_per_interpreter_gil.cpp index 7087025829..9c7bba875e 100644 --- a/tests/mod_per_interpreter_gil.cpp +++ b/tests/mod_per_interpreter_gil.cpp @@ -9,7 +9,7 @@ namespace py = pybind11; PYBIND11_MODULE(mod_per_interpreter_gil, m, py::mod_gil_not_used(), - py::multiple_interpreters(py::multiple_interpreters::per_interpreter_gil)) { + py::multiple_interpreters::per_interpreter_gil()) { m.def("internals_at", []() { return reinterpret_cast(&py::detail::get_internals()); }); } diff --git a/tests/mod_shared_interpreter_gil.cpp b/tests/mod_shared_interpreter_gil.cpp index af08800eca..7c94ad798a 100644 --- a/tests/mod_shared_interpreter_gil.cpp +++ b/tests/mod_shared_interpreter_gil.cpp @@ -8,7 +8,7 @@ namespace py = pybind11; PYBIND11_MODULE(mod_shared_interpreter_gil, m, - py::multiple_interpreters(py::multiple_interpreters::shared_gil)) { + py::multiple_interpreters::shared_gil()) { m.def("internals_at", []() { return reinterpret_cast(&py::detail::get_internals()); }); } diff --git a/tests/test_embed/external_module.cpp b/tests/test_embed/external_module.cpp index bf937586fd..3465e8b371 100644 --- a/tests/test_embed/external_module.cpp +++ b/tests/test_embed/external_module.cpp @@ -9,7 +9,7 @@ namespace py = pybind11; PYBIND11_MODULE(external_module, m, py::mod_gil_not_used(), - py::multiple_interpreters(py::multiple_interpreters::per_interpreter_gil)) { + py::multiple_interpreters::per_interpreter_gil()) { class A { public: From 0b73706ce2729b4975cc3c31d72b5fc8d562e30a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 02:51:50 +0000 Subject: [PATCH 31/35] style: pre-commit fixes --- include/pybind11/pybind11.h | 12 +++--------- tests/mod_shared_interpreter_gil.cpp | 4 +--- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 8bf2f2c466..06e42033fe 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1275,15 +1275,9 @@ class multiple_interpreters { per_interpreter_gil /// Use to activate Py_MOD_PER_INTERPRETER_GIL_SUPPORTED }; - static constexpr level not_supported() { - return level::not_supported; - } - static constexpr level shared_gil() { - return level::shared_gil; - } - static constexpr level per_interpreter_gil() { - return level::per_interpreter_gil; - } + static constexpr level not_supported() { return level::not_supported; } + static constexpr level shared_gil() { return level::shared_gil; } + static constexpr level per_interpreter_gil() { return level::per_interpreter_gil; } explicit multiple_interpreters(level l) : level_(l) {} level value() const { return level_; } diff --git a/tests/mod_shared_interpreter_gil.cpp b/tests/mod_shared_interpreter_gil.cpp index 7c94ad798a..4f5f91e293 100644 --- a/tests/mod_shared_interpreter_gil.cpp +++ b/tests/mod_shared_interpreter_gil.cpp @@ -6,9 +6,7 @@ namespace py = pybind11; * modules are different across subinterpreters */ -PYBIND11_MODULE(mod_shared_interpreter_gil, - m, - py::multiple_interpreters::shared_gil()) { +PYBIND11_MODULE(mod_shared_interpreter_gil, m, py::multiple_interpreters::shared_gil()) { m.def("internals_at", []() { return reinterpret_cast(&py::detail::get_internals()); }); } From 3036f9f1f9d35ed522f4949618f2c200a8c9b4ed Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Tue, 13 May 2025 00:40:45 -0400 Subject: [PATCH 32/35] fix: return class, not enum Co-authored-by: Ralf W. Grosse-Kunstleve Signed-off-by: Henry Schreiner --- include/pybind11/pybind11.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 06e42033fe..a1fbfa647a 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1275,11 +1275,15 @@ class multiple_interpreters { per_interpreter_gil /// Use to activate Py_MOD_PER_INTERPRETER_GIL_SUPPORTED }; - static constexpr level not_supported() { return level::not_supported; } - static constexpr level shared_gil() { return level::shared_gil; } - static constexpr level per_interpreter_gil() { return level::per_interpreter_gil; } + static multiple_interpreters not_supported() { + return multiple_interpreters(level::not_supported); + } + static multiple_interpreters shared_gil() { return multiple_interpreters(level::shared_gil); } + static multiple_interpreters per_interpreter_gil() { + return multiple_interpreters(level::per_interpreter_gil); + } - explicit multiple_interpreters(level l) : level_(l) {} + explicit constexpr multiple_interpreters(level l) : level_(l) {} level value() const { return level_; } private: From 22968e96fe1c2fb851b0050148707bc9c69ddacf Mon Sep 17 00:00:00 2001 From: b-pass Date: Tue, 13 May 2025 18:30:37 -0400 Subject: [PATCH 33/35] Have to join these threads to make sure they are totally done before the test returns. --- tests/test_embed/test_interpreter.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 11e8cb7845..b112fd1f2b 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -548,8 +548,8 @@ TEST_CASE("Per-Subinterpreter GIL") { main_tstate); // switch back so the scoped_acquire can release the GIL properly }; - std::thread(thread_main, 1).detach(); - std::thread(thread_main, 2).detach(); + std::thread t1(thread_main, 1); + std::thread t2(thread_main, 2); // we spawned two threads, at this point they are both waiting for started to increase ++started; @@ -590,6 +590,9 @@ TEST_CASE("Per-Subinterpreter GIL") { std::this_thread::sleep_for(std::chrono::microseconds(1)); } + t1.join(); + t2.join(); + // now we have the gil again, sanity check REQUIRE(py::cast(py::module_::import("external_module").attr("multi_interp")) == "1"); From 89e9599e488f7372b0d506919040d5a4b2262751 Mon Sep 17 00:00:00 2001 From: b-pass Date: Tue, 13 May 2025 19:38:48 -0400 Subject: [PATCH 34/35] Wrap module_def initialization in a static so it only happens once. If it happens concurrently in multiple threads, badness ensues.... --- include/pybind11/detail/common.h | 24 +++++++++++++----------- tests/test_embed/test_interpreter.cpp | 16 ++++------------ 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 2c197d1b85..e9d954bc49 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -401,17 +401,19 @@ PYBIND11_WARNING_DISABLE_CLANG("-Wgnu-zero-variadic-macro-arguments") PYBIND11_PLUGIN_IMPL(name) { \ PYBIND11_CHECK_PYTHON_VERSION \ PYBIND11_ENSURE_INTERNALS_READY \ - auto &slots = PYBIND11_CONCAT(pybind11_module_slots_, name); \ - slots[0] \ - = {Py_mod_exec, reinterpret_cast(&PYBIND11_CONCAT(pybind11_exec_, name))}; \ - slots[1] = {0, nullptr}; \ - auto m = ::pybind11::module_::initialize_multiphase_module_def( \ - PYBIND11_TOSTRING(name), \ - nullptr, \ - &PYBIND11_CONCAT(pybind11_module_def_, name), \ - slots, \ - ##__VA_ARGS__); \ - return m.ptr(); \ + static auto result = []() { \ + auto &slots = PYBIND11_CONCAT(pybind11_module_slots_, name); \ + slots[0] = {Py_mod_exec, \ + reinterpret_cast(&PYBIND11_CONCAT(pybind11_exec_, name))}; \ + slots[1] = {0, nullptr}; \ + return ::pybind11::module_::initialize_multiphase_module_def( \ + PYBIND11_TOSTRING(name), \ + nullptr, \ + &PYBIND11_CONCAT(pybind11_module_def_, name), \ + slots, \ + ##__VA_ARGS__); \ + }(); \ + return result.ptr(); \ } \ int PYBIND11_CONCAT(pybind11_exec_, name)(PyObject * pm) { \ pybind11::detail::get_num_interpreters_seen() += 1; \ diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index b112fd1f2b..cde1a3f468 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -465,10 +465,9 @@ TEST_CASE("Per-Subinterpreter GIL") { auto main_int = py::module_::import("external_module").attr("internals_at")().cast(); - std::atomic started, sync, finished, failure; + std::atomic started, sync, failure; started = 0; sync = 0; - finished = 0; failure = 0; // REQUIRE throws on failure, so we can't use it within the thread @@ -541,9 +540,7 @@ TEST_CASE("Per-Subinterpreter GIL") { T_REQUIRE(sub_int != main_int); Py_EndInterpreter(sub); - - ++finished; - + PyThreadState_Swap( main_tstate); // switch back so the scoped_acquire can release the GIL properly }; @@ -583,16 +580,11 @@ TEST_CASE("Per-Subinterpreter GIL") { // so we move sync so that thread 2 can finish executing ++sync; - ++finished; - // now wait for both threads to complete - while (finished != 3) - std::this_thread::sleep_for(std::chrono::microseconds(1)); + t1.join(); + t2.join(); } - t1.join(); - t2.join(); - // now we have the gil again, sanity check REQUIRE(py::cast(py::module_::import("external_module").attr("multi_interp")) == "1"); From 81f9ac90cb8ee0f79461158fa2257ee87fe505cf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 23:40:33 +0000 Subject: [PATCH 35/35] style: pre-commit fixes --- tests/test_embed/test_interpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index cde1a3f468..bd1632f53d 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -540,7 +540,7 @@ TEST_CASE("Per-Subinterpreter GIL") { T_REQUIRE(sub_int != main_int); Py_EndInterpreter(sub); - + PyThreadState_Swap( main_tstate); // switch back so the scoped_acquire can release the GIL properly };