From 07524ca51a72968358f5537d0e8633cbe114bb28 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Wed, 31 Jul 2024 19:08:15 -0400 Subject: [PATCH] Harden `--locals` against trashed memory When attempting to produce a repr for local variables, attempt to cope better with trashed memory: - Use "" as the repr of a `PyObject*` pointing directly at invalid memory, or one whose `ob_type` points at invalid memory. - Use "" as, for instance, the repr of a list containing an invalid `ob_items` pointer. More generally, use this for any object where we can determine the type, but generating the repr according to that type's rules fails due to accessing unmapped memory. Test this with some ctypes abuse. Signed-off-by: Matt Wozniski --- news/194.bugfix.rst | 1 + src/pystack/_pystack/pytypes.cpp | 71 ++++++++++++++++------- tests/integration/test_local_variables.py | 68 ++++++++++++++++++++++ 3 files changed, 119 insertions(+), 21 deletions(-) create mode 100644 news/194.bugfix.rst diff --git a/news/194.bugfix.rst b/news/194.bugfix.rst new file mode 100644 index 00000000..431fcd05 --- /dev/null +++ b/news/194.bugfix.rst @@ -0,0 +1 @@ +Heap corruption could cause PyStack to fail to generate a stack when ``--locals`` mode was used. This has been fixed by falling back to a reasonable default when attempting to format the repr of a local variable causes a dereference of an invalid pointer. diff --git a/src/pystack/_pystack/pytypes.cpp b/src/pystack/_pystack/pytypes.cpp index 9853d526..b8b5c00a 100644 --- a/src/pystack/_pystack/pytypes.cpp +++ b/src/pystack/_pystack/pytypes.cpp @@ -503,7 +503,7 @@ std::string GenericObject::toString(ssize_t max_size) const { std::stringstream os; - os << "<" << std::hex << std::showbase << d_classname << " at " << d_addr << ">"; + os << "<" << std::hex << d_classname << " at 0x" << d_addr << ">"; std::string object_str = os.str(); return limitOutput(object_str, max_size); } @@ -521,19 +521,39 @@ NoneObject::toString([[maybe_unused]] ssize_t max_size) const Object::Object(const std::shared_ptr& manager, remote_addr_t addr) : d_addr(addr) +, d_type_addr(0) +, d_classname() +, d_flags() , d_manager(manager) { LOG(DEBUG) << std::hex << std::showbase << "Copying PyObject data from address " << addr; PyObject obj; - manager->copyObjectFromProcess(d_addr, &obj); + try { + manager->copyObjectFromProcess(d_addr, &obj); + } catch (RemoteMemCopyError& ex) { + LOG(WARNING) << std::hex << std::showbase << "Failed to read PyObject data from address " + << d_addr; + d_classname = "invalid object"; + return; + } PyTypeObject cls; LOG(DEBUG) << std::hex << std::showbase << "Copying typeobject from address " << obj.ob_type; d_type_addr = reinterpret_cast(obj.ob_type); - manager->copyMemoryFromProcess((remote_addr_t)obj.ob_type, manager->offsets().py_type.size, &cls); + try { + manager->copyMemoryFromProcess( + (remote_addr_t)obj.ob_type, + manager->offsets().py_type.size, + &cls); - d_flags = manager->getField(cls, &py_type_v::o_tp_flags); + d_flags = manager->getField(cls, &py_type_v::o_tp_flags); + } catch (RemoteMemCopyError& ex) { + LOG(WARNING) << std::hex << std::showbase << "Failed to read typeobject from address " + << obj.ob_type; + d_classname = "invalid object"; + return; + } remote_addr_t name_addr = manager->getField(cls, &py_type_v::o_tp_name); try { @@ -577,24 +597,33 @@ Object::toString(ssize_t max_size) const if (max_size <= 5) { return ELLIPSIS; } - std::stringstream os; - std::visit( - overloaded{ - [&](const auto& arg) { os << arg.toString(max_size); }, - [&](const bool arg) { os << arg; }, - [&](const long arg) { os << arg; }, - [&](const double arg) { os << arg; }, - [&](const std::string& arg) { - std::string truncated = arg; - if (static_cast(max_size) < arg.size()) { - truncated = arg.substr(0, max_size - 3) + "..."; - } - os << truncated; - }, - }, - toConcreteObject()); + try { + std::stringstream os; + std::visit( + overloaded{ + [&](const auto& arg) { os << arg.toString(max_size); }, + [&](const bool arg) { os << arg; }, + [&](const long arg) { os << arg; }, + [&](const double arg) { os << arg; }, + [&](const std::string& arg) { + std::string truncated = arg; + if (static_cast(max_size) < arg.size()) { + truncated = arg.substr(0, max_size - 3) + "..."; + } + os << truncated; + }, + }, + toConcreteObject()); + + return os.str(); + } catch (RemoteMemCopyError& ex) { + LOG(WARNING) << std::hex << std::showbase << "Failed to create a repr for object of type " + << d_classname << " at address " << d_addr; - return os.str(); + std::stringstream os; + os << std::hex << std::showbase << "<" << d_classname << " object at " << d_addr << ">"; + return os.str(); + } } long diff --git a/tests/integration/test_local_variables.py b/tests/integration/test_local_variables.py index ed528ccb..715e4917 100644 --- a/tests/integration/test_local_variables.py +++ b/tests/integration/test_local_variables.py @@ -558,3 +558,71 @@ def second_func(the_argument): second_func_frame = find_frame(frames, "second_func") assert "the_argument" not in second_func_frame.arguments + + +@ALL_PYTHONS +@ALL_SOURCES +def test_trashed_locals(generate_threads, python, tmpdir): + # GIVEN + _, python_executable = python + + program_template = """ +import ctypes +import sys +import time + +class ListObject(ctypes.Structure): + _fields_ = [ + ("ob_refcnt", ctypes.c_ssize_t), + ("ob_type", ctypes.c_void_p), + ("ob_size", ctypes.c_ssize_t), + ("ob_item", ctypes.c_void_p), + ] + +class TupleObject(ctypes.Structure): + _fields_ = [ + ("ob_refcnt", ctypes.c_ssize_t), + ("ob_type", ctypes.c_void_p), + ("ob_size", ctypes.c_ssize_t), + ("ob_item0", ctypes.c_void_p), + ("ob_item1", ctypes.c_void_p), + ] + +def main(): + bad_type = (1, 2, 3) + bad_elem = (4, 5, 6) + nullelem = (7, 8, 9) + bad_list = [0, 1, 2] + + TupleObject.from_address(id(bad_type)).ob_type = 0xded + TupleObject.from_address(id(bad_elem)).ob_item1 = 0xbad + TupleObject.from_address(id(nullelem)).ob_item1 = 0x0 + ListObject.from_address(id(bad_list)).ob_item = 0x0 + + fifo = sys.argv[1] + with open(sys.argv[1], "w") as fifo: + fifo.write("ready") + time.sleep(1000) + +main() + """ + + script = tmpdir / "the_script.py" + script.write_text(program_template, encoding="utf-8") + + # WHEN + threads = generate_threads(python_executable, script, tmpdir, locals=True) + + # THEN + + assert len(threads) == 1 + (thread,) = threads + + frames = list(thread.frames) + assert (len(frames)) == 2 + + main = find_frame(frames, "main") + assert re.match(r"^$", main.locals["bad_type"]) + assert main.locals["bad_elem"] == "(4, , 6)" + assert main.locals["nullelem"] == "(7, , 9)" + assert re.match(r"^$", main.locals["bad_list"])