Skip to content

Commit 0855146

Browse files
Plug leaking function_records in cpp_function initialization in case of exceptions (found by Valgrind in #2746) (#2756)
* Plug leaking function_record objects when exceptions are thrown * Plug leak of strdup'ed strings in function_record * Some extra comments about the function_record ownership dance * Clean up the function_record better, in case of exceptions * Demonstrate some extra function_record leaks * Change DeleteStrings template argument to free_strings runtime argument in destruct(function_record *) * Zero-state unique_function_record deleter object * Clarify rvalue reference to unique_ptr parameter in initialize_generic * Use push_back with const char * instead of emplace_back
1 parent 230fa53 commit 0855146

File tree

3 files changed

+100
-24
lines changed

3 files changed

+100
-24
lines changed

include/pybind11/pybind11.h

+74-24
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,16 @@ class cpp_function : public function {
114114
object name() const { return attr("__name__"); }
115115

116116
protected:
117+
struct InitializingFunctionRecordDeleter {
118+
// `destruct(function_record, false)`: `initialize_generic` copies strings and
119+
// takes care of cleaning up in case of exceptions. So pass `false` to `free_strings`.
120+
void operator()(detail::function_record * rec) { destruct(rec, false); }
121+
};
122+
using unique_function_record = std::unique_ptr<detail::function_record, InitializingFunctionRecordDeleter>;
123+
117124
/// Space optimization: don't inline this frequently instantiated fragment
118-
PYBIND11_NOINLINE detail::function_record *make_function_record() {
119-
return new detail::function_record();
125+
PYBIND11_NOINLINE unique_function_record make_function_record() {
126+
return unique_function_record(new detail::function_record());
120127
}
121128

122129
/// Special internal constructor for functors, lambda functions, etc.
@@ -126,7 +133,9 @@ class cpp_function : public function {
126133
struct capture { remove_reference_t<Func> f; };
127134

128135
/* Store the function including any extra state it might have (e.g. a lambda capture object) */
129-
auto rec = make_function_record();
136+
// The unique_ptr makes sure nothing is leaked in case of an exception.
137+
auto unique_rec = make_function_record();
138+
auto rec = unique_rec.get();
130139

131140
/* Store the capture object directly in the function record if there is enough space */
132141
if (sizeof(capture) <= sizeof(rec->data)) {
@@ -207,7 +216,8 @@ class cpp_function : public function {
207216
PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types();
208217

209218
/* Register the function with Python from generic (non-templated) code */
210-
initialize_generic(rec, signature.text, types.data(), sizeof...(Args));
219+
// Pass on the ownership over the `unique_rec` to `initialize_generic`. `rec` stays valid.
220+
initialize_generic(std::move(unique_rec), signature.text, types.data(), sizeof...(Args));
211221

212222
if (cast_in::has_args) rec->has_args = true;
213223
if (cast_in::has_kwargs) rec->has_kwargs = true;
@@ -223,20 +233,51 @@ class cpp_function : public function {
223233
}
224234
}
225235

236+
// Utility class that keeps track of all duplicated strings, and cleans them up in its destructor,
237+
// unless they are released. Basically a RAII-solution to deal with exceptions along the way.
238+
class strdup_guard {
239+
public:
240+
~strdup_guard() {
241+
for (auto s : strings)
242+
std::free(s);
243+
}
244+
char *operator()(const char *s) {
245+
auto t = strdup(s);
246+
strings.push_back(t);
247+
return t;
248+
}
249+
void release() {
250+
strings.clear();
251+
}
252+
private:
253+
std::vector<char *> strings;
254+
};
255+
226256
/// Register a function call with Python (generic non-templated code goes here)
227-
void initialize_generic(detail::function_record *rec, const char *text,
257+
void initialize_generic(unique_function_record &&unique_rec, const char *text,
228258
const std::type_info *const *types, size_t args) {
259+
// Do NOT receive `unique_rec` by value. If this function fails to move out the unique_ptr,
260+
// we do not want this to destuct the pointer. `initialize` (the caller) still relies on the
261+
// pointee being alive after this call. Only move out if a `capsule` is going to keep it alive.
262+
auto rec = unique_rec.get();
263+
264+
// Keep track of strdup'ed strings, and clean them up as long as the function's capsule
265+
// has not taken ownership yet (when `unique_rec.release()` is called).
266+
// Note: This cannot easily be fixed by a `unique_ptr` with custom deleter, because the strings
267+
// are only referenced before strdup'ing. So only *after* the following block could `destruct`
268+
// safely be called, but even then, `repr` could still throw in the middle of copying all strings.
269+
strdup_guard guarded_strdup;
229270

230271
/* Create copies of all referenced C-style strings */
231-
rec->name = strdup(rec->name ? rec->name : "");
232-
if (rec->doc) rec->doc = strdup(rec->doc);
272+
rec->name = guarded_strdup(rec->name ? rec->name : "");
273+
if (rec->doc) rec->doc = guarded_strdup(rec->doc);
233274
for (auto &a: rec->args) {
234275
if (a.name)
235-
a.name = strdup(a.name);
276+
a.name = guarded_strdup(a.name);
236277
if (a.descr)
237-
a.descr = strdup(a.descr);
278+
a.descr = guarded_strdup(a.descr);
238279
else if (a.value)
239-
a.descr = strdup(repr(a.value).cast<std::string>().c_str());
280+
a.descr = guarded_strdup(repr(a.value).cast<std::string>().c_str());
240281
}
241282

242283
rec->is_constructor = !strcmp(rec->name, "__init__") || !strcmp(rec->name, "__setstate__");
@@ -319,13 +360,13 @@ class cpp_function : public function {
319360
#if PY_MAJOR_VERSION < 3
320361
if (strcmp(rec->name, "__next__") == 0) {
321362
std::free(rec->name);
322-
rec->name = strdup("next");
363+
rec->name = guarded_strdup("next");
323364
} else if (strcmp(rec->name, "__bool__") == 0) {
324365
std::free(rec->name);
325-
rec->name = strdup("__nonzero__");
366+
rec->name = guarded_strdup("__nonzero__");
326367
}
327368
#endif
328-
rec->signature = strdup(signature.c_str());
369+
rec->signature = guarded_strdup(signature.c_str());
329370
rec->args.shrink_to_fit();
330371
rec->nargs = (std::uint16_t) args;
331372

@@ -356,9 +397,10 @@ class cpp_function : public function {
356397
rec->def->ml_meth = reinterpret_cast<PyCFunction>(reinterpret_cast<void (*) (void)>(*dispatcher));
357398
rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS;
358399

359-
capsule rec_capsule(rec, [](void *ptr) {
400+
capsule rec_capsule(unique_rec.release(), [](void *ptr) {
360401
destruct((detail::function_record *) ptr);
361402
});
403+
guarded_strdup.release();
362404

363405
object scope_module;
364406
if (rec->scope) {
@@ -393,13 +435,15 @@ class cpp_function : public function {
393435
chain_start = rec;
394436
rec->next = chain;
395437
auto rec_capsule = reinterpret_borrow<capsule>(((PyCFunctionObject *) m_ptr)->m_self);
396-
rec_capsule.set_pointer(rec);
438+
rec_capsule.set_pointer(unique_rec.release());
439+
guarded_strdup.release();
397440
} else {
398441
// Or end of chain (normal behavior)
399442
chain_start = chain;
400443
while (chain->next)
401444
chain = chain->next;
402-
chain->next = rec;
445+
chain->next = unique_rec.release();
446+
guarded_strdup.release();
403447
}
404448
}
405449

@@ -452,7 +496,7 @@ class cpp_function : public function {
452496
}
453497

454498
/// When a cpp_function is GCed, release any memory allocated by pybind11
455-
static void destruct(detail::function_record *rec) {
499+
static void destruct(detail::function_record *rec, bool free_strings = true) {
456500
// If on Python 3.9, check the interpreter "MICRO" (patch) version.
457501
// If this is running on 3.9.0, we have to work around a bug.
458502
#if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
@@ -463,14 +507,20 @@ class cpp_function : public function {
463507
detail::function_record *next = rec->next;
464508
if (rec->free_data)
465509
rec->free_data(rec);
466-
std::free((char *) rec->name);
467-
std::free((char *) rec->doc);
468-
std::free((char *) rec->signature);
469-
for (auto &arg: rec->args) {
470-
std::free(const_cast<char *>(arg.name));
471-
std::free(const_cast<char *>(arg.descr));
472-
arg.value.dec_ref();
510+
// During initialization, these strings might not have been copied yet,
511+
// so they cannot be freed. Once the function has been created, they can.
512+
// Check `make_function_record` for more details.
513+
if (free_strings) {
514+
std::free((char *) rec->name);
515+
std::free((char *) rec->doc);
516+
std::free((char *) rec->signature);
517+
for (auto &arg: rec->args) {
518+
std::free(const_cast<char *>(arg.name));
519+
std::free(const_cast<char *>(arg.descr));
520+
}
473521
}
522+
for (auto &arg: rec->args)
523+
arg.value.dec_ref();
474524
if (rec->def) {
475525
std::free(const_cast<char *>(rec->def->ml_doc));
476526
// Python 3.9.0 decref's these in the wrong order; rec->def

tests/test_constants_and_functions.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -124,4 +124,19 @@ TEST_SUBMODULE(constants_and_functions, m) {
124124
m.def("f2", f2);
125125
m.def("f3", f3);
126126
m.def("f4", f4);
127+
128+
// test_function_record_leaks
129+
struct LargeCapture {
130+
// This should always be enough to trigger the alternative branch
131+
// where `sizeof(capture) > sizeof(rec->data)`
132+
uint64_t zeros[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
133+
};
134+
m.def("register_large_capture_with_invalid_arguments", [](py::module_ m) {
135+
LargeCapture capture; // VS 2015's MSVC is acting up if we create the array here
136+
m.def("should_raise", [capture](int) { return capture.zeros[9] + 33; }, py::kw_only(), py::arg());
137+
});
138+
m.def("register_with_raising_repr", [](py::module_ m, py::object default_value) {
139+
m.def("should_raise", [](int, int, py::object) { return 42; }, "some docstring",
140+
py::arg_v("x", 42), py::arg_v("y", 42, "<the answer>"), py::arg_v("z", default_value));
141+
});
127142
}

tests/test_constants_and_functions.py

+11
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,14 @@ def test_exception_specifiers():
4040
assert m.f2(53) == 55
4141
assert m.f3(86) == 89
4242
assert m.f4(140) == 144
43+
44+
45+
def test_function_record_leaks():
46+
class RaisingRepr:
47+
def __repr__(self):
48+
raise RuntimeError("Surprise!")
49+
50+
with pytest.raises(RuntimeError):
51+
m.register_large_capture_with_invalid_arguments(m)
52+
with pytest.raises(RuntimeError):
53+
m.register_with_raising_repr(m, RaisingRepr())

0 commit comments

Comments
 (0)