Skip to content

Commit d5246fe

Browse files
felixweinbergerjingx8885surya-prakash-susarla
committed
Fix child process cleanup in stdio termination
When terminating MCP servers, child processes were being orphaned because only the parent process was killed. This caused resource leaks and prevented proper cleanup, especially with tools like npx that spawn child processes for the actual server implementation. This was happening on both POSIX and Windows systems - however because of implementation details, resolving this is non-trivial and requires introducing psutil to introduce cross-platform utilities for dealing with children and process trees. This addresses critical issues where MCP servers using process spawning tools would leave zombie processes running after client shutdown. resolves #850 resolves #729 Co-authored-by: jingx8885 <[email protected]> Co-authored-by: Surya Prakash Susarla <[email protected]>
1 parent 7af9e65 commit d5246fe

File tree

5 files changed

+538
-14
lines changed

5 files changed

+538
-14
lines changed

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ dependencies = [
3232
"pydantic-settings>=2.5.2",
3333
"uvicorn>=0.23.1; sys_platform != 'emscripten'",
3434
"jsonschema>=4.20.0",
35+
"pywin32>=310; sys_platform == 'win32'",
3536
]
3637

3738
[project.optional-dependencies]
@@ -124,5 +125,7 @@ filterwarnings = [
124125
# This should be fixed on Uvicorn's side.
125126
"ignore::DeprecationWarning:websockets",
126127
"ignore:websockets.server.WebSocketServerProtocol is deprecated:DeprecationWarning",
127-
"ignore:Returning str or bytes.*:DeprecationWarning:mcp.server.lowlevel"
128+
"ignore:Returning str or bytes.*:DeprecationWarning:mcp.server.lowlevel",
129+
# pywin32 internal deprecation warning - will be fixed in future pywin32 release
130+
"ignore:getargs.*The 'u' format is deprecated:DeprecationWarning"
128131
]

src/mcp/client/stdio/__init__.py

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import os
2+
import signal
23
import sys
34
from contextlib import asynccontextmanager
45
from pathlib import Path
56
from typing import Literal, TextIO
67

78
import anyio
89
import anyio.lowlevel
10+
from anyio.abc import Process
911
from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
1012
from anyio.streams.text import TextReceiveStream
1113
from pydantic import BaseModel, Field
@@ -14,8 +16,10 @@
1416
from mcp.shared.message import SessionMessage
1517

1618
from .win32 import (
19+
FallbackProcess,
1720
create_windows_process,
1821
get_windows_executable_command,
22+
terminate_windows_process_tree,
1923
)
2024

2125
# Environment variables to inherit by default
@@ -184,7 +188,7 @@ async def stdin_writer():
184188
await process.wait()
185189
except TimeoutError:
186190
# If process doesn't terminate in time, force kill it
187-
process.kill()
191+
await _terminate_process_with_children(process)
188192
except ProcessLookupError:
189193
# Process already exited, which is fine
190194
pass
@@ -219,11 +223,80 @@ async def _create_platform_compatible_process(
219223
):
220224
"""
221225
Creates a subprocess in a platform-compatible way.
222-
Returns a process handle.
226+
227+
Unix: Creates process in a new session/process group for killpg support
228+
Windows: Creates process in a Job Object for reliable child termination
223229
"""
224230
if sys.platform == "win32":
231+
# Windows: Use Job Objects for proper process tree management
225232
process = await create_windows_process(command, args, env, errlog, cwd)
226233
else:
227-
process = await anyio.open_process([command, *args], env=env, stderr=errlog, cwd=cwd)
234+
# Unix: Create process in new session for process group termination
235+
process = await anyio.open_process(
236+
[command, *args],
237+
env=env,
238+
stderr=errlog,
239+
cwd=cwd,
240+
start_new_session=True,
241+
)
228242

229243
return process
244+
245+
246+
async def _terminate_process_with_children(process: Process | FallbackProcess, timeout: float = 2.0) -> None:
247+
"""
248+
Terminate a process and all its children using platform-specific methods.
249+
250+
Unix: Uses os.killpg() for atomic process group termination
251+
Windows: Uses Job Objects via pywin32 for reliable child process cleanup
252+
"""
253+
if sys.platform == "win32":
254+
# Windows: Use Job Object termination
255+
await terminate_windows_process_tree(process)
256+
else:
257+
# Unix: Use process groups for atomic termination
258+
pid = getattr(process, "pid", None)
259+
if pid is None:
260+
popen = getattr(process, "popen", None)
261+
if popen:
262+
pid = getattr(popen, "pid", None)
263+
264+
if not pid:
265+
return
266+
267+
try:
268+
# Get process group ID (we use start_new_session=True)
269+
pgid = os.getpgid(pid)
270+
271+
# Send SIGTERM to entire process group atomically
272+
os.killpg(pgid, signal.SIGTERM)
273+
274+
# Wait for graceful termination
275+
deadline = anyio.current_time() + timeout
276+
while anyio.current_time() < deadline:
277+
try:
278+
# Check if process group still exists (signal 0 = check only)
279+
os.killpg(pgid, 0)
280+
await anyio.sleep(0.1)
281+
except ProcessLookupError:
282+
# Process group terminated successfully
283+
return
284+
285+
# Force kill if still alive after timeout
286+
try:
287+
os.killpg(pgid, signal.SIGKILL)
288+
except ProcessLookupError:
289+
# Already dead
290+
pass
291+
292+
except (ProcessLookupError, PermissionError, OSError):
293+
# Fall back to simple terminate if process group approach fails
294+
try:
295+
process.terminate()
296+
with anyio.fail_after(timeout):
297+
await process.wait()
298+
except Exception:
299+
try:
300+
process.kill()
301+
except Exception:
302+
pass

src/mcp/client/stdio/win32.py

Lines changed: 141 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,28 @@
66
import subprocess
77
import sys
88
from pathlib import Path
9-
from typing import BinaryIO, TextIO, cast
9+
from typing import BinaryIO, TextIO, Union, cast
1010

1111
import anyio
1212
from anyio import to_thread
1313
from anyio.abc import Process
1414
from anyio.streams.file import FileReadStream, FileWriteStream
1515

16+
# Windows-specific imports for Job Objects
17+
if sys.platform == "win32":
18+
import pywintypes
19+
import win32api
20+
import win32con
21+
import win32job
22+
else:
23+
# Type stubs for non-Windows platforms
24+
win32api = None
25+
win32con = None
26+
win32job = None
27+
pywintypes = None
28+
29+
JobHandle = int
30+
1631

1732
def get_windows_executable_command(command: str) -> str:
1833
"""
@@ -103,6 +118,11 @@ def kill(self) -> None:
103118
"""Kill the subprocess immediately (alias for terminate)."""
104119
self.terminate()
105120

121+
@property
122+
def pid(self) -> int:
123+
"""Return the process ID."""
124+
return self.popen.pid
125+
106126

107127
# ------------------------
108128
# Updated function
@@ -117,13 +137,16 @@ async def create_windows_process(
117137
cwd: Path | str | None = None,
118138
) -> Process | FallbackProcess:
119139
"""
120-
Creates a subprocess in a Windows-compatible way.
140+
Creates a subprocess in a Windows-compatible way with Job Object support.
121141
122142
Attempt to use anyio's open_process for async subprocess creation.
123143
In some cases this will throw NotImplementedError on Windows, e.g.
124144
when using the SelectorEventLoop which does not support async subprocesses.
125145
In that case, we fall back to using subprocess.Popen.
126146
147+
The process is automatically added to a Job Object to ensure all child
148+
processes are terminated when the parent is terminated.
149+
127150
Args:
128151
command (str): The executable to run
129152
args (list[str]): List of command line arguments
@@ -132,8 +155,12 @@ async def create_windows_process(
132155
cwd (Path | str | None): Working directory for the subprocess
133156
134157
Returns:
135-
FallbackProcess: Async-compatible subprocess with stdin and stdout streams
158+
Process | FallbackProcess: Async-compatible subprocess with stdin and stdout streams
136159
"""
160+
# Create a job object for this process
161+
job = _create_job_object()
162+
process = None
163+
137164
try:
138165
# First try using anyio with Windows-specific flags to hide console window
139166
process = await anyio.open_process(
@@ -146,10 +173,9 @@ async def create_windows_process(
146173
stderr=errlog,
147174
cwd=cwd,
148175
)
149-
return process
150176
except NotImplementedError:
151177
# Windows often doesn't support async subprocess creation, use fallback
152-
return await _create_windows_fallback_process(command, args, env, errlog, cwd)
178+
process = await _create_windows_fallback_process(command, args, env, errlog, cwd)
153179
except Exception:
154180
# Try again without creation flags
155181
process = await anyio.open_process(
@@ -158,7 +184,9 @@ async def create_windows_process(
158184
stderr=errlog,
159185
cwd=cwd,
160186
)
161-
return process
187+
188+
_maybe_assign_process_to_job(process, job)
189+
return process
162190

163191

164192
async def _create_windows_fallback_process(
@@ -171,7 +199,8 @@ async def _create_windows_fallback_process(
171199
"""
172200
Create a subprocess using subprocess.Popen as a fallback when anyio fails.
173201
174-
This function wraps the sync subprocess.Popen in an async-compatible interface.
202+
This function wraps the sync subprocess.Popen in an async-compatible interface
203+
and optionally assigns it to a job object.
175204
"""
176205
try:
177206
# Try launching with creationflags to avoid opening a new console window
@@ -185,8 +214,6 @@ async def _create_windows_fallback_process(
185214
bufsize=0, # Unbuffered output
186215
creationflags=getattr(subprocess, "CREATE_NO_WINDOW", 0),
187216
)
188-
return FallbackProcess(popen_obj)
189-
190217
except Exception:
191218
# If creationflags failed, fallback without them
192219
popen_obj = subprocess.Popen(
@@ -198,4 +225,108 @@ async def _create_windows_fallback_process(
198225
cwd=cwd,
199226
bufsize=0,
200227
)
201-
return FallbackProcess(popen_obj)
228+
process = FallbackProcess(popen_obj)
229+
return process
230+
231+
232+
def _create_job_object() -> int | None:
233+
"""
234+
Create a Windows Job Object configured to terminate all processes when closed.
235+
236+
Returns:
237+
The job object handle, or None if creation failed.
238+
"""
239+
if sys.platform != "win32" or not win32job:
240+
return None
241+
242+
try:
243+
# Create an unnamed job object
244+
job = win32job.CreateJobObject(None, "")
245+
246+
# Query current job information
247+
extended_info = win32job.QueryInformationJobObject(job, win32job.JobObjectExtendedLimitInformation)
248+
249+
# Set the job to terminate all processes when the handle is closed
250+
extended_info["BasicLimitInformation"]["LimitFlags"] |= win32job.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE
251+
252+
# Apply the updated job information
253+
win32job.SetInformationJobObject(job, win32job.JobObjectExtendedLimitInformation, extended_info)
254+
255+
return job
256+
except Exception:
257+
# If job creation fails, return None
258+
return None
259+
260+
261+
def _maybe_assign_process_to_job(process: Union[Process, "FallbackProcess"], job: JobHandle | None) -> None:
262+
"""
263+
Try to assign a process to a job object. If assignment fails
264+
for any reason, the job handle is closed.
265+
266+
Args:
267+
process: The process to assign to the job
268+
job: The job object handle (may be None)
269+
"""
270+
if not job:
271+
return
272+
273+
if sys.platform != "win32" or not win32api or not win32con or not win32job:
274+
return
275+
276+
try:
277+
# Open the process with required permissions
278+
process_handle = win32api.OpenProcess(
279+
win32con.PROCESS_SET_QUOTA | win32con.PROCESS_TERMINATE, False, process.pid
280+
)
281+
if not process_handle:
282+
raise Exception("Failed to open process handle")
283+
284+
try:
285+
# Assign process to job
286+
win32job.AssignProcessToJobObject(job, process_handle)
287+
# Store job on process for later cleanup
288+
process._job_object = job # type: ignore
289+
finally:
290+
# Always close the process handle
291+
win32api.CloseHandle(process_handle)
292+
except Exception:
293+
# If we can't assign to job, close it
294+
if win32api:
295+
win32api.CloseHandle(job)
296+
297+
298+
async def terminate_windows_process_tree(process: Union[Process, "FallbackProcess"]) -> None:
299+
"""
300+
Terminate a process and all its children on Windows.
301+
302+
If the process has an associated job object, it will be terminated.
303+
Otherwise, falls back to basic process termination.
304+
305+
Args:
306+
process: The process to terminate
307+
"""
308+
if sys.platform != "win32":
309+
return
310+
311+
# Check if process has a job object
312+
job = getattr(process, "_job_object", None)
313+
if job and win32job:
314+
try:
315+
# Terminate all processes in the job (exit code 1)
316+
win32job.TerminateJobObject(job, 1)
317+
except Exception:
318+
# Job might already be terminated
319+
pass
320+
finally:
321+
if win32api:
322+
try:
323+
# Close the job handle
324+
win32api.CloseHandle(job)
325+
except Exception:
326+
pass
327+
328+
# Always try to terminate the process itself as well
329+
try:
330+
process.terminate()
331+
except Exception:
332+
pass

0 commit comments

Comments
 (0)