diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index ab6221aff4..e9d954bc49 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -232,6 +232,13 @@ # define PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF #endif +// Slightly faster code paths are available when PYBIND11_SUBINTERPRETER_SUPPORT is *not* defined, +// so avoid defining it for implementations that do not support subinterpreters. +// However, defining it unnecessarily is not expected to break anything. +#if PY_VERSION_HEX >= 0x030C0000 && !defined(PYPY_VERSION) && !defined(GRAALVM_PYTHON) +# define PYBIND11_SUBINTERPRETER_SUPPORT +#endif + // #define PYBIND11_STR_LEGACY_PERMISSIVE // If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject // (probably surprising and never documented, but this was the @@ -394,19 +401,22 @@ PYBIND11_WARNING_DISABLE_CLANG("-Wgnu-zero-variadic-macro-arguments") PYBIND11_PLUGIN_IMPL(name) { \ PYBIND11_CHECK_PYTHON_VERSION \ PYBIND11_ENSURE_INTERNALS_READY \ - auto &slots = PYBIND11_CONCAT(pybind11_module_slots_, name); \ - slots[0] \ - = {Py_mod_exec, reinterpret_cast(&PYBIND11_CONCAT(pybind11_exec_, name))}; \ - slots[1] = {0, nullptr}; \ - auto m = ::pybind11::module_::initialize_multiphase_module_def( \ - PYBIND11_TOSTRING(name), \ - nullptr, \ - &PYBIND11_CONCAT(pybind11_module_def_, name), \ - slots, \ - ##__VA_ARGS__); \ - return m.ptr(); \ + static auto result = []() { \ + auto &slots = PYBIND11_CONCAT(pybind11_module_slots_, name); \ + slots[0] = {Py_mod_exec, \ + reinterpret_cast(&PYBIND11_CONCAT(pybind11_exec_, name))}; \ + slots[1] = {0, nullptr}; \ + return ::pybind11::module_::initialize_multiphase_module_def( \ + PYBIND11_TOSTRING(name), \ + nullptr, \ + &PYBIND11_CONCAT(pybind11_module_def_, name), \ + slots, \ + ##__VA_ARGS__); \ + }(); \ + return result.ptr(); \ } \ int PYBIND11_CONCAT(pybind11_exec_, name)(PyObject * pm) { \ + pybind11::detail::get_num_interpreters_seen() += 1; \ try { \ auto m = pybind11::reinterpret_borrow<::pybind11::module_>(pm); \ PYBIND11_CONCAT(pybind11_init_, name)(m); \ diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 3687b48744..395ffbcb51 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -15,6 +15,7 @@ #include "common.h" +#include #include #include #include @@ -53,6 +54,7 @@ constexpr const char *internals_function_record_capsule_name = "pybind11_functio inline PyTypeObject *make_static_property_type(); inline PyTypeObject *make_default_metaclass(); inline PyObject *make_object_base_type(PyTypeObject *metaclass); +inline void translate_exception(std::exception_ptr p); // The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new // Thread Specific Storage (TSS) API. @@ -149,6 +151,20 @@ struct instance_map_shard { static_assert(sizeof(instance_map_shard) % 64 == 0, "instance_map_shard size is not a multiple of 64 bytes"); + +inline uint64_t round_up_to_next_pow2(uint64_t x) { + // Round-up to the next power of two. + // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + x--; + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + x |= (x >> 32); + x++; + return x; +} #endif /// Internal data structure used to track registered instances and types. @@ -178,9 +194,9 @@ struct internals { // extensions std::forward_list static_strings; // Stores the std::strings backing // detail::c_str() - PyTypeObject *static_property_type; - PyTypeObject *default_metaclass; - PyObject *instance_base; + PyTypeObject *static_property_type = nullptr; + PyTypeObject *default_metaclass = nullptr; + PyObject *instance_base = nullptr; // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined: PYBIND11_TLS_KEY_INIT(tstate) PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key) @@ -189,7 +205,36 @@ struct internals { type_map native_enum_type_map; - internals() = default; + internals() { + PyThreadState *cur_tstate = PyThreadState_Get(); + // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) + if (!PYBIND11_TLS_KEY_CREATE(tstate)) { + pybind11_fail( + "internals constructor: could not successfully initialize the tstate TSS key!"); + } + PYBIND11_TLS_REPLACE_VALUE(tstate, cur_tstate); + + // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) + if (!PYBIND11_TLS_KEY_CREATE(loader_life_support_tls_key)) { + pybind11_fail("internals constructor: could not successfully initialize the " + "loader_life_support TSS key!"); + } + + istate = cur_tstate->interp; + registered_exception_translators.push_front(&translate_exception); + static_property_type = make_static_property_type(); + default_metaclass = make_default_metaclass(); +#ifdef Py_GIL_DISABLED + // Scale proportional to the number of cores. 2x is a heuristic to reduce contention. + auto num_shards + = static_cast(round_up_to_next_pow2(2 * std::thread::hardware_concurrency())); + if (num_shards == 0) { + num_shards = 1; + } + instance_shards.reset(new instance_map_shard[num_shards]); + instance_shards_mask = num_shards - 1; +#endif + } internals(const internals &other) = delete; internals &operator=(const internals &other) = delete; ~internals() { @@ -206,6 +251,17 @@ struct internals { } }; +// the internals struct (above) is shared between all the modules. local_internals are only +// for a single module. Any changes made to internals may require an update to +// PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design, +// restricted to a single module. Whether a module has local internals or not should not +// impact any other modules, because the only things accessing the local internals is the +// module that contains them. +struct local_internals { + type_map registered_types_cpp; + std::forward_list registered_exception_translators; +}; + enum class holder_enum_t : uint8_t { undefined, std_unique_ptr, // Default, lacking interop with std::shared_ptr. @@ -249,15 +305,49 @@ struct type_info { "__pybind11_module_local_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) \ PYBIND11_COMPILER_TYPE_LEADING_UNDERSCORE PYBIND11_PLATFORM_ABI_ID "__" -/// Each module locally stores a pointer to the `internals` data. The data -/// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. -inline internals **&get_internals_pp() { - static internals **internals_pp = nullptr; - return internals_pp; +inline PyThreadState *get_thread_state_unchecked() { +#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON) + return PyThreadState_GET(); +#elif PY_VERSION_HEX < 0x030D0000 + return _PyThreadState_UncheckedGet(); +#else + return PyThreadState_GetUnchecked(); +#endif } -// forward decl -inline void translate_exception(std::exception_ptr); +/// We use this counter to figure out if there are or have been multiple subinterpreters active at +/// any point. This must never decrease while any interpreter may be running in any thread! +inline std::atomic &get_num_interpreters_seen() { + static std::atomic counter(0); + return counter; +} + +template +inline std::unique_ptr *&get_internals_pp() { +#ifdef PYBIND11_SUBINTERPRETER_SUPPORT + if (get_num_interpreters_seen() > 1) { + // Internals is one per interpreter. When multiple interpreters are alive in different + // threads we have to allow them to have different internals, so we need a thread_local. + static thread_local std::unique_ptr *t_internals_pp = nullptr; + static thread_local PyInterpreterState *istate_cached = nullptr; + // Whenever the interpreter changes on the current thread we need to invalidate the + // internals_pp so that it can be pulled from the interpreter's state dict. That is slow, + // so we use the current PyThreadState to check if it is necessary. The caller will see a + // null return and do the fetch from the state dict or create a new one (as needed). + auto *tstate = get_thread_state_unchecked(); + if (!tstate) { + istate_cached = nullptr; + t_internals_pp = nullptr; + } else if (tstate->interp != istate_cached) { + istate_cached = tstate->interp; + t_internals_pp = nullptr; + } + return t_internals_pp; + } +#endif + static std::unique_ptr *s_internals_pp = nullptr; + return s_internals_pp; +} template >::value, int> = 0> @@ -384,49 +474,46 @@ inline object get_python_state_dict() { return state_dict; } -inline object get_internals_obj_from_state_dict(handle state_dict) { - return reinterpret_steal( - dict_getitemstringref(state_dict.ptr(), PYBIND11_INTERNALS_ID)); -} - -inline internals **get_internals_pp_from_capsule(handle obj) { - void *raw_ptr = PyCapsule_GetPointer(obj.ptr(), /*name=*/nullptr); - if (raw_ptr == nullptr) { - raise_from(PyExc_SystemError, "pybind11::detail::get_internals_pp_from_capsule() FAILED"); - throw error_already_set(); +template +inline std::unique_ptr * +get_internals_pp_from_capsule_in_state_dict(dict &state_dict, char const *state_dict_key) { + auto internals_obj + = reinterpret_steal(dict_getitemstringref(state_dict.ptr(), state_dict_key)); + if (internals_obj) { + void *raw_ptr = PyCapsule_GetPointer(internals_obj.ptr(), /*name=*/nullptr); + if (!raw_ptr) { + raise_from(PyExc_SystemError, + "pybind11::detail::get_internals_pp_from_capsule_in_state_dict() FAILED"); + throw error_already_set(); + } + return reinterpret_cast *>(raw_ptr); } - return static_cast(raw_ptr); -} - -inline uint64_t round_up_to_next_pow2(uint64_t x) { - // Round-up to the next power of two. - // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - x--; - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - x |= (x >> 32); - x++; - return x; + return nullptr; } /// Return a reference to the current `internals` data PYBIND11_NOINLINE internals &get_internals() { - auto **&internals_pp = get_internals_pp(); + auto *&internals_pp = get_internals_pp(); if (internals_pp && *internals_pp) { + // This is the fast path, everything is already setup, just return it return **internals_pp; } + // Slow path, something needs fetched from the state dict or created + + // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals. gil_scoped_acquire_simple gil; error_scope err_scope; dict state_dict = get_python_state_dict(); - if (object internals_obj = get_internals_obj_from_state_dict(state_dict)) { - internals_pp = get_internals_pp_from_capsule(internals_obj); + internals_pp = get_internals_pp_from_capsule_in_state_dict(state_dict, + PYBIND11_INTERNALS_ID); + if (!internals_pp) { + internals_pp = new std::unique_ptr; + state_dict[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); } - if (internals_pp && *internals_pp) { + + if (*internals_pp) { // We loaded the internals through `state_dict`, which means that our `error_already_set` // and `builtin_exception` may be different local classes than the ones set up in the // initial exception translator, below, so add another for our local exception classes. @@ -435,68 +522,61 @@ PYBIND11_NOINLINE internals &get_internals() { // libc++ with CPython doesn't require this (types are explicitly exported) // libc++ with PyPy still need it, awaiting further investigation #if !defined(__GLIBCXX__) - (*internals_pp)->registered_exception_translators.push_front(&translate_local_exception); + if ((*internals_pp)->registered_exception_translators.empty() + || (*internals_pp)->registered_exception_translators.front() + != &translate_local_exception) { + (*internals_pp) + ->registered_exception_translators.push_front(&translate_local_exception); + } #endif } else { - if (!internals_pp) { - internals_pp = new internals *(); - } - auto *&internals_ptr = *internals_pp; - internals_ptr = new internals(); + auto &internals_ptr = *internals_pp; + internals_ptr.reset(new internals()); - PyThreadState *tstate = PyThreadState_Get(); - // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) - if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->tstate)) { - pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!"); - } - PYBIND11_TLS_REPLACE_VALUE(internals_ptr->tstate, tstate); - - // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) - if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->loader_life_support_tls_key)) { - pybind11_fail("get_internals: could not successfully initialize the " - "loader_life_support TSS key!"); - } - - internals_ptr->istate = tstate->interp; - state_dict[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); - internals_ptr->registered_exception_translators.push_front(&translate_exception); - internals_ptr->static_property_type = make_static_property_type(); - internals_ptr->default_metaclass = make_default_metaclass(); - internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass); -#ifdef Py_GIL_DISABLED - // Scale proportional to the number of cores. 2x is a heuristic to reduce contention. - auto num_shards - = static_cast(round_up_to_next_pow2(2 * std::thread::hardware_concurrency())); - if (num_shards == 0) { - num_shards = 1; + if (!internals_ptr->instance_base) { + // This calls get_internals, so cannot be called from within the internals constructor + // called above because internals_ptr must be set before get_internals is called again + internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass); } - internals_ptr->instance_shards.reset(new instance_map_shard[num_shards]); - internals_ptr->instance_shards_mask = num_shards - 1; -#endif // Py_GIL_DISABLED } + return **internals_pp; } -// the internals struct (above) is shared between all the modules. local_internals are only -// for a single module. Any changes made to internals may require an update to -// PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design, -// restricted to a single module. Whether a module has local internals or not should not -// impact any other modules, because the only things accessing the local internals is the -// module that contains them. -struct local_internals { - type_map registered_types_cpp; - std::forward_list registered_exception_translators; -}; +/// A string key uniquely describing this module +inline char const *get_local_internals_id() { + // Use the address of this static itself as part of the key, so that the value is uniquely tied + // to where the module is loaded in memory + static const std::string this_module_idstr + = PYBIND11_MODULE_LOCAL_ID + + std::to_string(reinterpret_cast(&this_module_idstr)); + return this_module_idstr.c_str(); +} /// Works like `get_internals`, but for things which are locally registered. inline local_internals &get_local_internals() { - // Current static can be created in the interpreter finalization routine. If the later will be - // destroyed in another static variable destructor, creation of this static there will cause - // static deinitialization fiasco. In order to avoid it we avoid destruction of the - // local_internals static. One can read more about the problem and current solution here: - // https://google.github.io/styleguide/cppguide.html#Static_and_Global_Variables - static auto *locals = new local_internals(); - return *locals; + auto *&local_internals_pp = get_internals_pp(); + if (local_internals_pp && *local_internals_pp) { + return **local_internals_pp; + } + + // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals. + gil_scoped_acquire_simple gil; + error_scope err_scope; + + dict state_dict = get_python_state_dict(); + local_internals_pp = get_internals_pp_from_capsule_in_state_dict( + state_dict, get_local_internals_id()); + if (!local_internals_pp) { + local_internals_pp = new std::unique_ptr; + state_dict[get_local_internals_id()] + = capsule(reinterpret_cast(local_internals_pp)); + } + if (!*local_internals_pp) { + local_internals_pp->reset(new local_internals()); + } + + return **local_internals_pp; } #ifdef Py_GIL_DISABLED diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index ceef18cb4a..9734d92181 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -497,16 +497,6 @@ PYBIND11_NOINLINE handle get_object_handle(const void *ptr, const detail::type_i }); } -inline PyThreadState *get_thread_state_unchecked() { -#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON) - return PyThreadState_GET(); -#elif PY_VERSION_HEX < 0x030D0000 - return _PyThreadState_UncheckedGet(); -#else - return PyThreadState_GetUnchecked(); -#endif -} - // Forward declarations void keep_alive_impl(handle nurse, handle patient); inline PyObject *make_new_instance(PyTypeObject *type); diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 1b95c32a7f..a456e80a6e 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -193,6 +193,9 @@ inline void initialize_interpreter(bool init_signal_handlers = true, config.install_signal_handlers = init_signal_handlers ? 1 : 0; initialize_interpreter(&config, argc, argv, add_program_dir_to_path); #endif + + // There is exactly one interpreter alive currently. + detail::get_num_interpreters_seen() = 1; } /** \rst @@ -234,23 +237,31 @@ inline void finalize_interpreter() { // Get the internals pointer (without creating it if it doesn't exist). It's possible for the // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()` // during destruction), so we get the pointer-pointer here and check it after Py_Finalize(). - detail::internals **internals_ptr_ptr = detail::get_internals_pp(); - // It could also be stashed in state_dict, so look there too: - if (object internals_obj - = get_internals_obj_from_state_dict(detail::get_python_state_dict())) { - internals_ptr_ptr = detail::get_internals_pp_from_capsule(internals_obj); + auto *&internals_ptr_ptr = detail::get_internals_pp(); + auto *&local_internals_ptr_ptr = detail::get_internals_pp(); + { + dict state_dict = detail::get_python_state_dict(); + internals_ptr_ptr = detail::get_internals_pp_from_capsule_in_state_dict( + state_dict, PYBIND11_INTERNALS_ID); + local_internals_ptr_ptr + = detail::get_internals_pp_from_capsule_in_state_dict( + state_dict, detail::get_local_internals_id()); } - // Local internals contains data managed by the current interpreter, so we must clear them to - // avoid undefined behaviors when initializing another interpreter - detail::get_local_internals().registered_types_cpp.clear(); - detail::get_local_internals().registered_exception_translators.clear(); Py_Finalize(); if (internals_ptr_ptr) { - delete *internals_ptr_ptr; - *internals_ptr_ptr = nullptr; + internals_ptr_ptr->reset(); + } + + // Local internals contains data managed by the current interpreter, so we must clear them to + // avoid undefined behaviors when initializing another interpreter + if (local_internals_ptr_ptr) { + local_internals_ptr_ptr->reset(); } + + // We know there is no interpreter alive now, so we can reset the count + detail::get_num_interpreters_seen() = 0; } /** \rst diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 6854d89ed6..7108343f6f 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1267,6 +1267,29 @@ class mod_gil_not_used { bool flag_; }; +class multiple_interpreters { +public: + enum class level { + not_supported, /// Use to activate Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED + shared_gil, /// Use to activate Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED + per_interpreter_gil /// Use to activate Py_MOD_PER_INTERPRETER_GIL_SUPPORTED + }; + + static multiple_interpreters not_supported() { + return multiple_interpreters(level::not_supported); + } + static multiple_interpreters shared_gil() { return multiple_interpreters(level::shared_gil); } + static multiple_interpreters per_interpreter_gil() { + return multiple_interpreters(level::per_interpreter_gil); + } + + explicit constexpr multiple_interpreters(level l) : level_(l) {} + level value() const { return level_; } + +private: + level level_; +}; + PYBIND11_NAMESPACE_BEGIN(detail) inline bool gil_not_used_option() { return false; } @@ -1281,6 +1304,27 @@ inline bool gil_not_used_option(F &&, O &&...o) { return gil_not_used_option(o...); } +#ifdef Py_mod_multiple_interpreters +inline void *multi_interp_slot() { return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; } +template +inline void *multi_interp_slot(multiple_interpreters mi, O &&...o) { + switch (mi.value()) { + case multiple_interpreters::level::per_interpreter_gil: + return Py_MOD_PER_INTERPRETER_GIL_SUPPORTED; + case multiple_interpreters::level::shared_gil: + return Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED; + case multiple_interpreters::level::not_supported: + return Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED; + } + // silence warnings with this unreachable line: + return multi_interp_slot(o...); +} +template +inline void *multi_interp_slot(F &&, O &&...o) { + return multi_interp_slot(o...); +} +#endif + PYBIND11_NAMESPACE_END(detail) /// Wrapper for Python extension modules @@ -1437,7 +1481,7 @@ class module_ : public object { /// Must be a POD type, and must hold enough entries for all of the possible slots PLUS ONE for /// the sentinel (0) end slot. - using slots_array = std::array; + using slots_array = std::array; /** \rst Initialized a module def for use with multi-phase module initialization. @@ -1460,8 +1504,14 @@ class module_ : public object { ++next_slot; } - bool nogil PYBIND11_MAYBE_UNUSED = detail::gil_not_used_option(options...); - if (nogil) { +#ifdef Py_mod_multiple_interpreters + if (next_slot >= term_slot) { + pybind11_fail("initialize_multiphase_module_def: not enough space in slots"); + } + slots[next_slot++] = {Py_mod_multiple_interpreters, detail::multi_interp_slot(options...)}; +#endif + + if (detail::gil_not_used_option(options...)) { #if defined(Py_mod_gil) && defined(Py_GIL_DISABLED) if (next_slot >= term_slot) { pybind11_fail("initialize_multiphase_module_def: not enough space in slots"); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 29172b6ce6..374a138865 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -154,6 +154,7 @@ set(PYBIND11_TEST_FILES test_methods_and_attributes test_modules test_multiple_inheritance + test_multiple_interpreters.py test_native_enum test_numpy_array test_numpy_dtypes @@ -563,6 +564,17 @@ add_custom_target( WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" USES_TERMINAL) +if(NOT PYBIND11_CUDA_TESTS) + # This module doesn't get mixed with other test modules because those aren't subinterpreter safe. + pybind11_add_module(mod_per_interpreter_gil THIN_LTO mod_per_interpreter_gil.cpp) + pybind11_add_module(mod_shared_interpreter_gil THIN_LTO mod_shared_interpreter_gil.cpp) + set_target_properties(mod_per_interpreter_gil PROPERTIES LIBRARY_OUTPUT_DIRECTORY + "$<1:${CMAKE_CURRENT_BINARY_DIR}>") + set_target_properties(mod_shared_interpreter_gil PROPERTIES LIBRARY_OUTPUT_DIRECTORY + "$<1:${CMAKE_CURRENT_BINARY_DIR}>") + add_dependencies(pytest mod_per_interpreter_gil mod_shared_interpreter_gil) +endif() + if(PYBIND11_TEST_OVERRIDE) add_custom_command( TARGET pytest diff --git a/tests/mod_per_interpreter_gil.cpp b/tests/mod_per_interpreter_gil.cpp new file mode 100644 index 0000000000..9c7bba875e --- /dev/null +++ b/tests/mod_per_interpreter_gil.cpp @@ -0,0 +1,15 @@ +#include + +namespace py = pybind11; + +/* Simple test module/test class to check that the referenced internals data of external pybind11 + * modules are different across subinterpreters + */ + +PYBIND11_MODULE(mod_per_interpreter_gil, + m, + py::mod_gil_not_used(), + py::multiple_interpreters::per_interpreter_gil()) { + m.def("internals_at", + []() { return reinterpret_cast(&py::detail::get_internals()); }); +} diff --git a/tests/mod_shared_interpreter_gil.cpp b/tests/mod_shared_interpreter_gil.cpp new file mode 100644 index 0000000000..4f5f91e293 --- /dev/null +++ b/tests/mod_shared_interpreter_gil.cpp @@ -0,0 +1,12 @@ +#include + +namespace py = pybind11; + +/* Simple test module/test class to check that the referenced internals data of external pybind11 + * modules are different across subinterpreters + */ + +PYBIND11_MODULE(mod_shared_interpreter_gil, m, py::multiple_interpreters::shared_gil()) { + m.def("internals_at", + []() { return reinterpret_cast(&py::detail::get_internals()); }); +} diff --git a/tests/test_embed/external_module.cpp b/tests/test_embed/external_module.cpp index 6564ecbef5..3465e8b371 100644 --- a/tests/test_embed/external_module.cpp +++ b/tests/test_embed/external_module.cpp @@ -6,7 +6,11 @@ namespace py = pybind11; * modules aren't preserved over a finalize/initialize. */ -PYBIND11_MODULE(external_module, m, py::mod_gil_not_used()) { +PYBIND11_MODULE(external_module, + m, + py::mod_gil_not_used(), + py::multiple_interpreters::per_interpreter_gil()) { + class A { public: explicit A(int value) : v{value} {}; diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 56496212fd..cf9f86b521 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -259,13 +259,18 @@ TEST_CASE("Add program dir to path using PyConfig") { #endif bool has_state_dict_internals_obj() { - return bool( - py::detail::get_internals_obj_from_state_dict(py::detail::get_python_state_dict())); + py::dict state = py::detail::get_python_state_dict(); + return state.contains(PYBIND11_INTERNALS_ID); } bool has_pybind11_internals_static() { - auto **&ipp = py::detail::get_internals_pp(); - return (ipp != nullptr) && (*ipp != nullptr); + auto *&ipp = py::detail::get_internals_pp(); + return (ipp != nullptr) && *ipp; +} + +uintptr_t get_details_as_uintptr() { + return reinterpret_cast( + py::detail::get_internals_pp()->get()); } TEST_CASE("Restart the interpreter") { @@ -277,7 +282,7 @@ TEST_CASE("Restart the interpreter") { == 123); // local and foreign module internals should point to the same internals: - REQUIRE(reinterpret_cast(*py::detail::get_internals_pp()) + REQUIRE(get_details_as_uintptr() == py::module_::import("external_module").attr("internals_at")().cast()); // Restart the interpreter. @@ -293,7 +298,7 @@ TEST_CASE("Restart the interpreter") { pybind11::detail::get_internals(); REQUIRE(has_state_dict_internals_obj()); REQUIRE(has_pybind11_internals_static()); - REQUIRE(reinterpret_cast(*py::detail::get_internals_pp()) + REQUIRE(get_details_as_uintptr() == py::module_::import("external_module").attr("internals_at")().cast()); // Make sure that an interpreter with no get_internals() created until finalize still gets the @@ -304,6 +309,8 @@ TEST_CASE("Restart the interpreter") { py::module_::import("__main__").attr("internals_destroy_test") = py::capsule(&ran, [](void *ran) { py::detail::get_internals(); + REQUIRE(has_state_dict_internals_obj()); + REQUIRE(has_pybind11_internals_static()); *static_cast(ran) = true; }); REQUIRE_FALSE(has_state_dict_internals_obj()); @@ -311,6 +318,7 @@ TEST_CASE("Restart the interpreter") { REQUIRE_FALSE(ran); py::finalize_interpreter(); REQUIRE(ran); + REQUIRE_FALSE(has_pybind11_internals_static()); py::initialize_interpreter(); REQUIRE_FALSE(has_state_dict_internals_obj()); REQUIRE_FALSE(has_pybind11_internals_static()); @@ -329,6 +337,8 @@ TEST_CASE("Restart the interpreter") { } TEST_CASE("Subinterpreter") { + py::module_::import("external_module"); // in the main interpreter + // Add tags to the modules in the main interpreter and test the basics. py::module_::import("__main__").attr("main_tag") = "main interpreter"; { @@ -344,11 +354,25 @@ TEST_CASE("Subinterpreter") { auto *main_tstate = PyThreadState_Get(); auto *sub_tstate = Py_NewInterpreter(); - // Subinterpreters get their own copy of builtins. detail::get_internals() still - // works by returning from the static variable, i.e. all interpreters share a single - // global pybind11::internals; + py::detail::get_num_interpreters_seen()++; + + // Subinterpreters get their own copy of builtins. REQUIRE_FALSE(has_state_dict_internals_obj()); + +#if defined(PYBIND11_SUBINTERPRETER_SUPPORT) && PY_VERSION_HEX >= 0x030C0000 + // internals hasn't been populated yet, but will be different for the subinterpreter + REQUIRE_FALSE(has_pybind11_internals_static()); + + py::list(py::module_::import("sys").attr("path")).append(py::str(".")); + + auto ext_int = py::module_::import("external_module").attr("internals_at")().cast(); + py::detail::get_internals(); + REQUIRE(has_pybind11_internals_static()); + REQUIRE(get_details_as_uintptr() == ext_int); +#else + // This static is still defined REQUIRE(has_pybind11_internals_static()); +#endif // Modules tags should be gone. REQUIRE_FALSE(py::hasattr(py::module_::import("__main__"), "tag")); @@ -360,13 +384,225 @@ TEST_CASE("Subinterpreter") { REQUIRE(m.attr("add")(1, 2).cast() == 3); } + // The subinterpreter now has internals populated since we imported a pybind11 module + REQUIRE(has_pybind11_internals_static()); + // Restore main interpreter. Py_EndInterpreter(sub_tstate); + py::detail::get_num_interpreters_seen() = 1; PyThreadState_Swap(main_tstate); REQUIRE(py::hasattr(py::module_::import("__main__"), "main_tag")); REQUIRE(py::hasattr(py::module_::import("widget_module"), "extension_module_tag")); + REQUIRE(has_state_dict_internals_obj()); +} + +#if defined(PYBIND11_SUBINTERPRETER_SUPPORT) +TEST_CASE("Multiple Subinterpreters") { + // Make sure the module is in the main interpreter and save its pointer + auto *main_ext = py::module_::import("external_module").ptr(); + auto main_int + = py::module_::import("external_module").attr("internals_at")().cast(); + py::module_::import("external_module").attr("multi_interp") = "1"; + + auto *main_tstate = PyThreadState_Get(); + + /// Create and switch to a subinterpreter. + auto *sub1_tstate = Py_NewInterpreter(); + py::detail::get_num_interpreters_seen()++; + + py::list(py::module_::import("sys").attr("path")).append(py::str(".")); + + // The subinterpreter has its own copy of this module which is completely separate from main + auto *sub1_ext = py::module_::import("external_module").ptr(); + REQUIRE(sub1_ext != main_ext); + REQUIRE_FALSE(py::hasattr(py::module_::import("external_module"), "multi_interp")); + py::module_::import("external_module").attr("multi_interp") = "2"; + // The subinterpreter also has its own internals + auto sub1_int + = py::module_::import("external_module").attr("internals_at")().cast(); + REQUIRE(sub1_int != main_int); + + // Create another interpreter + auto *sub2_tstate = Py_NewInterpreter(); + py::detail::get_num_interpreters_seen()++; + + py::list(py::module_::import("sys").attr("path")).append(py::str(".")); + + // The second subinterpreter is separate from both main and the other subinterpreter + auto *sub2_ext = py::module_::import("external_module").ptr(); + REQUIRE(sub2_ext != main_ext); + REQUIRE(sub2_ext != sub1_ext); + REQUIRE_FALSE(py::hasattr(py::module_::import("external_module"), "multi_interp")); + py::module_::import("external_module").attr("multi_interp") = "3"; + // The subinterpreter also has its own internals + auto sub2_int + = py::module_::import("external_module").attr("internals_at")().cast(); + REQUIRE(sub2_int != main_int); + REQUIRE(sub2_int != sub1_int); + + PyThreadState_Swap(sub1_tstate); // go back to sub1 + + REQUIRE(py::cast(py::module_::import("external_module").attr("multi_interp")) + == "2"); + + PyThreadState_Swap(main_tstate); // go back to main + + auto post_int + = py::module_::import("external_module").attr("internals_at")().cast(); + // Make sure internals went back the way it was before + REQUIRE(main_int == post_int); + + REQUIRE(py::cast(py::module_::import("external_module").attr("multi_interp")) + == "1"); + + PyThreadState_Swap(sub1_tstate); + Py_EndInterpreter(sub1_tstate); + PyThreadState_Swap(sub2_tstate); + Py_EndInterpreter(sub2_tstate); + + py::detail::get_num_interpreters_seen() = 1; + PyThreadState_Swap(main_tstate); } +#endif + +#if defined(Py_MOD_PER_INTERPRETER_GIL_SUPPORTED) && defined(PYBIND11_SUBINTERPRETER_SUPPORT) +TEST_CASE("Per-Subinterpreter GIL") { + auto main_int + = py::module_::import("external_module").attr("internals_at")().cast(); + + std::atomic started, sync, failure; + started = 0; + sync = 0; + failure = 0; + +// REQUIRE throws on failure, so we can't use it within the thread +# define T_REQUIRE(status) \ + do { \ + assert(status); \ + if (!(status)) \ + ++failure; \ + } while (0) + + auto &&thread_main = [&](int num) { + while (started == 0) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + ++started; + + py::gil_scoped_acquire gil; + auto main_tstate = PyThreadState_Get(); + + // we have the GIL, we can access the main interpreter + auto t_int + = py::module_::import("external_module").attr("internals_at")().cast(); + T_REQUIRE(t_int == main_int); + py::module_::import("external_module").attr("multi_interp") = "1"; + + PyThreadState *sub = nullptr; + PyInterpreterConfig cfg; + memset(&cfg, 0, sizeof(cfg)); + cfg.check_multi_interp_extensions = 1; + cfg.gil = PyInterpreterConfig_OWN_GIL; + auto status = Py_NewInterpreterFromConfig(&sub, &cfg); + T_REQUIRE(!PyStatus_IsError(status)); + + py::detail::get_num_interpreters_seen()++; + + py::list(py::module_::import("sys").attr("path")).append(py::str(".")); + + // we have switched to the new interpreter and released the main gil + + // widget_module did not provide the mod_per_interpreter_gil tag, so it cannot be imported + bool caught = false; + try { + py::module_::import("widget_module"); + } catch (pybind11::error_already_set &pe) { + T_REQUIRE(pe.matches(PyExc_ImportError)); + std::string msg(pe.what()); + T_REQUIRE(msg.find("does not support loading in subinterpreters") + != std::string::npos); + caught = true; + } + T_REQUIRE(caught); + + T_REQUIRE(!py::hasattr(py::module_::import("external_module"), "multi_interp")); + py::module_::import("external_module").attr("multi_interp") = std::to_string(num); + + // wait for something to set sync to our thread number + // we are holding our subinterpreter's GIL + while (sync != num) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + + // now change it so the next thread can mvoe on + ++sync; + + // but keep holding the GIL until after the next thread moves on as well + while (sync == num + 1) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + + // one last check before quitting the thread, the internals should be different + auto sub_int + = py::module_::import("external_module").attr("internals_at")().cast(); + T_REQUIRE(sub_int != main_int); + + Py_EndInterpreter(sub); + + PyThreadState_Swap( + main_tstate); // switch back so the scoped_acquire can release the GIL properly + }; + + std::thread t1(thread_main, 1); + std::thread t2(thread_main, 2); + + // we spawned two threads, at this point they are both waiting for started to increase + ++started; + + // ok now wait for the threads to start + while (started != 3) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + + // we still hold the main GIL, at this point both threads are waiting on the main GIL + // IN THE CASE of free threading, the threads are waiting on sync (because there is no GIL) + + // IF the below code hangs in one of the wait loops, then the child thread GIL behavior did not + // function as expected. + { + // release the GIL and allow the threads to run + py::gil_scoped_release nogil; + + // the threads are now waiting on the sync + REQUIRE(sync == 0); + + // this will trigger thread 1 and then advance and trigger 2 and then advance + sync = 1; + + // wait for thread 2 to advance + while (sync != 3) + std::this_thread::sleep_for(std::chrono::microseconds(1)); + + // we know now that thread 1 has run and may be finishing + // and thread 2 is waiting for permission to advance + + // so we move sync so that thread 2 can finish executing + ++sync; + + // now wait for both threads to complete + t1.join(); + t2.join(); + } + + // now we have the gil again, sanity check + REQUIRE(py::cast(py::module_::import("external_module").attr("multi_interp")) + == "1"); + + // the threads are stopped. we can now lower this for the rest of the test + py::detail::get_num_interpreters_seen() = 1; + + // make sure nothing unexpected happened inside the threads, now that they are completed + REQUIRE(failure == 0); +# undef T_REQUIRE +} +#endif TEST_CASE("Execution frame") { // When the interpreter is embedded, there is no execution frame, but `py::exec` diff --git a/tests/test_multiple_interpreters.py b/tests/test_multiple_interpreters.py new file mode 100644 index 0000000000..d7321171bd --- /dev/null +++ b/tests/test_multiple_interpreters.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +import os +import pickle +import sys + +import pytest + + +@pytest.mark.skipif( + sys.platform.startswith("emscripten"), reason="Requires loadable modules" +) +def test_independent_subinterpreters(): + """Makes sure the internals object differs across independent subinterpreters""" + + sys.path.append(".") + + if sys.version_info >= (3, 14): + import interpreters + elif sys.version_info >= (3, 13): + import _interpreters as interpreters + elif sys.version_info >= (3, 12): + import _xxsubinterpreters as interpreters + else: + pytest.skip("Test requires a the interpreters stdlib module") + + import mod_per_interpreter_gil as m + + code = """ +import mod_per_interpreter_gil as m +import pickle +with open(pipeo, 'wb') as f: + pickle.dump(m.internals_at(), f) +""" + + interp1 = interpreters.create() + interp2 = interpreters.create() + try: + try: + res0 = interpreters.run_string(interp1, "import mod_shared_interpreter_gil") + if res0 is not None: + res0 = res0.msg + except Exception as e: + res0 = str(e) + + pipei, pipeo = os.pipe() + interpreters.run_string(interp1, code, shared={"pipeo": pipeo}) + with open(pipei, "rb") as f: + res1 = pickle.load(f) + + pipei, pipeo = os.pipe() + interpreters.run_string(interp2, code, shared={"pipeo": pipeo}) + with open(pipei, "rb") as f: + res2 = pickle.load(f) + + # do this while the two interpreters are active + import mod_per_interpreter_gil as m2 + + assert m.internals_at() == m2.internals_at(), ( + "internals should be the same within the main interpreter" + ) + finally: + interpreters.destroy(interp1) + interpreters.destroy(interp2) + + assert "does not support loading in subinterpreters" in res0, ( + "cannot use shared_gil in a default subinterpreter" + ) + assert res1 != m.internals_at(), "internals should differ from main interpreter" + assert res2 != m.internals_at(), "internals should differ from main interpreter" + assert res1 != res2, "internals should differ between interpreters" + + # do this after the two interpreters are destroyed and only one remains + import mod_per_interpreter_gil as m3 + + assert m.internals_at() == m3.internals_at(), ( + "internals should be the same within the main interpreter" + ) + + +@pytest.mark.skipif( + sys.platform.startswith("emscripten"), reason="Requires loadable modules" +) +def test_dependent_subinterpreters(): + """Makes sure the internals object differs across subinterpreters""" + + sys.path.append(".") + + if sys.version_info >= (3, 14): + import interpreters + elif sys.version_info >= (3, 13): + import _interpreters as interpreters + elif sys.version_info >= (3, 12): + import _xxsubinterpreters as interpreters + else: + pytest.skip("Test requires a the interpreters stdlib module") + + import mod_shared_interpreter_gil as m + + code = """ +import mod_shared_interpreter_gil as m +import pickle +with open(pipeo, 'wb') as f: + pickle.dump(m.internals_at(), f) +""" + + try: + interp1 = interpreters.create("legacy") + except TypeError: + pytest.skip("interpreters module needs to support legacy config") + + try: + pipei, pipeo = os.pipe() + interpreters.run_string(interp1, code, shared={"pipeo": pipeo}) + with open(pipei, "rb") as f: + res1 = pickle.load(f) + + # do this while the other interpreter is active + import mod_shared_interpreter_gil as m2 + + assert m.internals_at() == m2.internals_at(), ( + "internals should be the same within the main interpreter" + ) + finally: + interpreters.destroy(interp1) + + assert res1 != m.internals_at(), "internals should differ from main interpreter" + + # do this after the other interpreters are destroyed and only one remains + import mod_shared_interpreter_gil as m3 + + assert m.internals_at() == m3.internals_at(), ( + "internals should be the same within the main interpreter" + )