Skip to content

Commit

Permalink
Simplify nb_enable_try_inc_ref.
Browse files Browse the repository at this point in the history
We only call `nb_enable_try_inc_ref` during object construction when we
have the only reference to the object. We can safely overwrite
`ob_ref_shared` without using an atomic compare-exchange.

This speeds up creating Python object wrappers by ~5 ns on my Intel
machine. It doesn't seem to matter as much on Apple's M1 chip.
  • Loading branch information
colesbury committed Jan 17, 2025
1 parent fdafb34 commit d7b645d
Showing 1 changed file with 6 additions and 18 deletions.
24 changes: 6 additions & 18 deletions src/nb_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,22 +44,10 @@ static void nb_enable_try_inc_ref(PyObject *obj) noexcept {
#if 0 && defined(Py_GIL_DISABLED) && PY_VERSION_HEX >= 0x030E00A5
PyUnstable_EnableTryIncRef(obj);
#elif defined(Py_GIL_DISABLED)
// TODO: Replace with PyUnstable_Object_EnableTryIncRef when available.
// See https://github.com/python/cpython/issues/128844
if (_Py_IsImmortal(obj)) {
return;
}
for (;;) {
Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&obj->ob_ref_shared);
if ((shared & _Py_REF_SHARED_FLAG_MASK) != 0) {
// Nothing to do if it's in WEAKREFS, QUEUED, or MERGED states.
return;
}
if (_Py_atomic_compare_exchange_ssize(
&obj->ob_ref_shared, &shared, shared | _Py_REF_MAYBE_WEAKREF)) {
return;
}
}
// Since this is called during object construction, we know that we have
// the only reference to the object and can use a non-atomic write.
assert(obj->ob_ref_shared == 0);
obj->ob_ref_shared = _Py_REF_MAYBE_WEAKREF;
#endif
}

Expand Down Expand Up @@ -164,11 +152,11 @@ PyObject *inst_new_int(PyTypeObject *tp, PyObject * /* args */,
self->clear_keep_alive = 0;
self->intrusive = intrusive;
self->unused = 0;
nb_enable_try_inc_ref((PyObject *)self);

// Update hash table that maps from C++ to Python instance
nb_shard &shard = internals->shard((void *) payload);
lock_shard guard(shard);
nb_enable_try_inc_ref((PyObject *)self);
auto [it, success] = shard.inst_c2p.try_emplace((void *) payload, self);
check(success, "nanobind::detail::inst_new_int(): unexpected collision!");
}
Expand Down Expand Up @@ -230,12 +218,12 @@ PyObject *inst_new_ext(PyTypeObject *tp, void *value) {
self->clear_keep_alive = 0;
self->intrusive = intrusive;
self->unused = 0;
nb_enable_try_inc_ref((PyObject *)self);

nb_shard &shard = internals->shard(value);
lock_shard guard(shard);

// Update hash table that maps from C++ to Python instance
nb_enable_try_inc_ref((PyObject *)self);
auto [it, success] = shard.inst_c2p.try_emplace(value, self);

if (NB_UNLIKELY(!success)) {
Expand Down

0 comments on commit d7b645d

Please sign in to comment.