Warn when multiprocessing start method is 'fork'

Andy-Jost · Andy-Jost · commit 3d3499a5e87b · 2025-12-03T13:40:05.000-08:00
CUDA does not support the fork() system call. Forked subprocesses exhibit undefined behavior, including failure to initialize CUDA contexts and devices. Add warning checks in multiprocessing reduction functions for IPC objects (DeviceMemoryResource, IPCAllocationHandle, Event) that warn when the start method is 'fork'. The warning is emitted once per process when IPC objects are serialized. Fixes #1136
diff --git a/cuda_core/cuda/core/experimental/_event.pyx b/cuda_core/cuda/core/experimental/_event.pyx
@@ -21,6 +21,7 @@ from typing import TYPE_CHECKING, Optional
 from cuda.core.experimental._context import Context
 from cuda.core.experimental._utils.cuda_utils import (
     CUDAError,
+    _check_multiprocessing_start_method,
     driver,
 )
 if TYPE_CHECKING:
@@ -300,6 +301,7 @@ cdef class IPCEventDescriptor:
 
 
 def _reduce_event(event):
+    _check_multiprocessing_start_method()
     return event.from_ipc_descriptor, (event.get_ipc_descriptor(),)
 
 multiprocessing.reduction.register(Event, _reduce_event)
diff --git a/cuda_core/cuda/core/experimental/_memory/_ipc.pyx b/cuda_core/cuda/core/experimental/_memory/_ipc.pyx
@@ -10,6 +10,7 @@ from cuda.bindings cimport cydriver
 from cuda.core.experimental._memory._buffer cimport Buffer
 from cuda.core.experimental._stream cimport default_stream
 from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN
+from cuda.core.experimental._utils.cuda_utils import _check_multiprocessing_start_method
 
 import multiprocessing
 import os
@@ -129,6 +130,7 @@ cdef class IPCAllocationHandle:
 
 
 def _reduce_allocation_handle(alloc_handle):
+    _check_multiprocessing_start_method()
     df = multiprocessing.reduction.DupFd(alloc_handle.handle)
     return _reconstruct_allocation_handle, (type(alloc_handle), df, alloc_handle.uuid)
 
@@ -141,6 +143,7 @@ multiprocessing.reduction.register(IPCAllocationHandle, _reduce_allocation_handl
 
 
 def _deep_reduce_device_memory_resource(mr):
+    _check_multiprocessing_start_method()
     from .._device import Device
     device = Device(mr.device_id)
     alloc_handle = mr.get_allocation_handle()
diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx
@@ -5,6 +5,9 @@
 import functools
 from functools import partial
 import importlib.metadata
+import multiprocessing
+import platform
+import warnings
 from collections import namedtuple
 from collections.abc import Sequence
 from contextlib import ExitStack
@@ -283,3 +286,39 @@ class Transaction:
         """
         # pop_all() empties this stack so no callbacks are triggered on exit.
         self._stack.pop_all()
+
+
+# Track whether we've already warned about fork method
+_fork_warning_emitted = False
+
+
+def _check_multiprocessing_start_method():
+    """Check if multiprocessing start method is 'fork' and warn if so."""
+    global _fork_warning_emitted
+    if _fork_warning_emitted:
+        return
+
+    # Common warning message parts
+    common_message = (
+        "CUDA does not support. Forked subprocesses exhibit undefined behavior, "
+        "including failure to initialize CUDA contexts and devices. Set the start method "
+        "to 'spawn' before creating processes that use CUDA. "
+        "Use: multiprocessing.set_start_method('spawn')"
+    )
+
+    try:
+        start_method = multiprocessing.get_start_method()
+        if start_method == "fork":
+            message = f"multiprocessing start method is 'fork', which {common_message}"
+            warnings.warn(message, UserWarning, stacklevel=3)
+            _fork_warning_emitted = True
+    except RuntimeError:
+        # get_start_method() can raise RuntimeError if start method hasn't been set
+        # In this case, default is 'fork' on Linux, so we should warn
+        if platform.system() == "Linux":
+            message = (
+                f"multiprocessing start method is not set and defaults to 'fork' on Linux, "
+                f"which {common_message}"
+            )
+            warnings.warn(message, UserWarning, stacklevel=3)
+            _fork_warning_emitted = True
diff --git a/cuda_core/tests/test_multiprocessing_warning.py b/cuda_core/tests/test_multiprocessing_warning.py
@@ -0,0 +1,248 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Test that warnings are emitted when multiprocessing start method is 'fork'
+and IPC objects are serialized.
+
+These tests run in subprocesses to avoid the conftest.py session fixture that
+sets the start method to 'spawn'.
+"""
+
+import subprocess
+import sys
+import textwrap
+
+
+def test_warn_on_fork_method_device_memory_resource():
+    """Test that warning is emitted when DeviceMemoryResource is pickled with fork method."""
+    script = textwrap.dedent("""
+        import multiprocessing
+        import sys
+        import warnings
+
+        # Set start method to 'fork' before importing cuda.core
+        multiprocessing.set_start_method('fork', force=True)
+
+        # Capture warnings
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+
+            from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions
+
+            # Create a DeviceMemoryResource
+            device = Device(0)
+            device.set_current()
+            options = DeviceMemoryResourceOptions(max_size=2097152, ipc_enabled=True)
+            mr = DeviceMemoryResource(device, options=options)
+
+            # Trigger reduction by pickling with ForkingPickler (triggers multiprocessing reduction)
+            from multiprocessing.reduction import ForkingPickler
+            ForkingPickler.dumps(mr)
+
+            # Check that warning was emitted
+            if len(w) == 0:
+                print("ERROR: No warning emitted", file=sys.stderr)
+                sys.exit(1)
+
+            warning = w[0]
+            if warning.category != UserWarning:
+                print(f"ERROR: Expected UserWarning, got {warning.category}", file=sys.stderr)
+                sys.exit(1)
+
+            if "fork" not in str(warning.message).lower():
+                print(f"ERROR: Warning message doesn't mention 'fork': {warning.message}", file=sys.stderr)
+                sys.exit(1)
+
+            if "spawn" not in str(warning.message).lower():
+                print(f"ERROR: Warning message doesn't mention 'spawn': {warning.message}", file=sys.stderr)
+                sys.exit(1)
+
+            if "undefined behavior" not in str(warning.message).lower():
+                msg = f"ERROR: Warning message doesn't mention 'undefined behavior': {warning.message}"
+                print(msg, file=sys.stderr)
+                sys.exit(1)
+
+            print("SUCCESS: Warning emitted correctly")
+    """)
+
+    result = subprocess.run([sys.executable, "-c", script], capture_output=True, text=True, timeout=30)  # noqa: S603
+
+    assert result.returncode == 0, f"Subprocess failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"
+    assert "SUCCESS" in result.stdout
+
+
+def test_warn_on_fork_method_allocation_handle():
+    """Test that warning is emitted when IPCAllocationHandle is pickled with fork method."""
+    script = textwrap.dedent("""
+        import multiprocessing
+        import sys
+        import warnings
+
+        # Set start method to 'fork' before importing cuda.core
+        multiprocessing.set_start_method('fork', force=True)
+
+        # Capture warnings
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+
+            from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions
+
+            # Create a DeviceMemoryResource and get its allocation handle
+            device = Device(0)
+            device.set_current()
+            options = DeviceMemoryResourceOptions(max_size=2097152, ipc_enabled=True)
+            mr = DeviceMemoryResource(device, options=options)
+            alloc_handle = mr.get_allocation_handle()
+
+            # Trigger reduction by pickling with ForkingPickler (triggers multiprocessing reduction)
+            from multiprocessing.reduction import ForkingPickler
+            ForkingPickler.dumps(alloc_handle)
+
+            # Check that warning was emitted
+            if len(w) == 0:
+                print("ERROR: No warning emitted", file=sys.stderr)
+                sys.exit(1)
+
+            warning = w[0]
+            if warning.category != UserWarning:
+                print(f"ERROR: Expected UserWarning, got {warning.category}", file=sys.stderr)
+                sys.exit(1)
+
+            print("SUCCESS: Warning emitted correctly")
+    """)
+
+    result = subprocess.run([sys.executable, "-c", script], capture_output=True, text=True, timeout=30)  # noqa: S603
+
+    assert result.returncode == 0, f"Subprocess failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"
+    assert "SUCCESS" in result.stdout
+
+
+def test_warn_on_fork_method_event():
+    """Test that warning is emitted when Event is pickled with fork method."""
+    script = textwrap.dedent("""
+        import multiprocessing
+        import sys
+        import warnings
+
+        # Set start method to 'fork' before importing cuda.core
+        multiprocessing.set_start_method('fork', force=True)
+
+        # Capture warnings
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+
+            from cuda.core.experimental import Device, EventOptions
+
+            # Create an Event via stream.record()
+            device = Device(0)
+            device.set_current()
+            stream = device.create_stream()
+            ipc_event_options = EventOptions(ipc_enabled=True)
+            event = stream.record(options=ipc_event_options)
+
+            # Trigger reduction by pickling with ForkingPickler (triggers multiprocessing reduction)
+            from multiprocessing.reduction import ForkingPickler
+            ForkingPickler.dumps(event)
+
+            # Check that warning was emitted
+            if len(w) == 0:
+                print("ERROR: No warning emitted", file=sys.stderr)
+                sys.exit(1)
+
+            warning = w[0]
+            if warning.category != UserWarning:
+                print(f"ERROR: Expected UserWarning, got {warning.category}", file=sys.stderr)
+                sys.exit(1)
+
+            print("SUCCESS: Warning emitted correctly")
+    """)
+
+    result = subprocess.run([sys.executable, "-c", script], capture_output=True, text=True, timeout=30)  # noqa: S603
+
+    assert result.returncode == 0, f"Subprocess failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"
+    assert "SUCCESS" in result.stdout
+
+
+def test_no_warning_with_spawn_method():
+    """Test that no warning is emitted when start method is 'spawn'."""
+    script = textwrap.dedent("""
+        import multiprocessing
+        import sys
+        import warnings
+
+        # Set start method to 'spawn' before importing cuda.core
+        multiprocessing.set_start_method('spawn', force=True)
+
+        # Capture warnings
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+
+            from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions
+
+            # Create a DeviceMemoryResource
+            device = Device(0)
+            device.set_current()
+            options = DeviceMemoryResourceOptions(max_size=2097152, ipc_enabled=True)
+            mr = DeviceMemoryResource(device, options=options)
+
+            # Trigger reduction by pickling with ForkingPickler (triggers multiprocessing reduction)
+            from multiprocessing.reduction import ForkingPickler
+            ForkingPickler.dumps(mr)
+
+            # Check that no warning was emitted
+            fork_warnings = [warning for warning in w if "fork" in str(warning.message).lower()]
+            if len(fork_warnings) > 0:
+                print(f"ERROR: Unexpected warning emitted: {fork_warnings[0].message}", file=sys.stderr)
+                sys.exit(1)
+
+            print("SUCCESS: No warning emitted with spawn method")
+    """)
+
+    result = subprocess.run([sys.executable, "-c", script], capture_output=True, text=True, timeout=30)  # noqa: S603
+
+    assert result.returncode == 0, f"Subprocess failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"
+    assert "SUCCESS" in result.stdout
+
+
+def test_warning_emitted_only_once():
+    """Test that warning is only emitted once even when multiple objects are pickled."""
+    script = textwrap.dedent("""
+        import multiprocessing
+        import sys
+        import warnings
+
+        # Set start method to 'fork' before importing cuda.core
+        multiprocessing.set_start_method('fork', force=True)
+
+        # Capture warnings
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+
+            from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions
+
+            # Create multiple DeviceMemoryResources
+            device = Device(0)
+            device.set_current()
+            options = DeviceMemoryResourceOptions(max_size=2097152, ipc_enabled=True)
+            mr1 = DeviceMemoryResource(device, options=options)
+            mr2 = DeviceMemoryResource(device, options=options)
+
+            # Trigger reduction by pickling multiple times with ForkingPickler
+            from multiprocessing.reduction import ForkingPickler
+            ForkingPickler.dumps(mr1)
+            ForkingPickler.dumps(mr2)
+
+            # Check that warning was emitted only once
+            fork_warnings = [warning for warning in w if "fork" in str(warning.message).lower()]
+            if len(fork_warnings) != 1:
+                print(f"ERROR: Expected 1 warning, got {len(fork_warnings)}", file=sys.stderr)
+                sys.exit(1)
+
+            print("SUCCESS: Warning emitted only once")
+    """)
+
+    result = subprocess.run([sys.executable, "-c", script], capture_output=True, text=True, timeout=30)  # noqa: S603
+
+    assert result.returncode == 0, f"Subprocess failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"
+    assert "SUCCESS" in result.stdout