Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Optional

from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError
from cuda.pathfinder._dynamic_libs.load_dl_linux import get_candidate_sonames, _load_lib, abs_path_for_dynamic_library
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
SITE_PACKAGES_LIBDIRS_LINUX,
SITE_PACKAGES_LIBDIRS_WINDOWS,
Expand All @@ -16,6 +17,10 @@
from cuda.pathfinder._utils.find_sub_dirs import find_sub_dirs_all_sitepackages
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS

if IS_WINDOWS:
from cuda.pathfinder._dynamic_libs.load_dl_windows import find_with_system_search_windows as find_with_system_search
else:
from cuda.pathfinder._dynamic_libs.load_dl_linux import find_with_system_search_linux as find_with_system_search

def _no_such_file_in_sub_dirs(
sub_dirs: Sequence[str], file_wild: str, error_messages: list[str], attachments: list[str]
Expand Down Expand Up @@ -163,6 +168,7 @@ def __init__(self, libname: str):
self.error_messages: list[str] = []
self.attachments: list[str] = []
self.abs_path: Optional[str] = None
self.distribution: Optional[str] = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is not used anymore?


def try_site_packages(self) -> Optional[str]:
if IS_WINDOWS:
Expand All @@ -186,6 +192,10 @@ def try_with_conda_prefix(self) -> Optional[str]:
def try_with_cuda_home(self) -> Optional[str]:
return self._find_using_lib_dir(_find_lib_dir_using_cuda_home(self.libname))

def try_with_system_search(self) -> Optional[str]:
return find_with_system_search(self.libname)


def _find_using_lib_dir(self, lib_dir: Optional[str]) -> Optional[str]:
if lib_dir is None:
return None
Expand All @@ -203,6 +213,8 @@ def _find_using_lib_dir(self, lib_dir: Optional[str]) -> Optional[str]:
self.error_messages,
self.attachments,
)
if self.abs_path is not None:
self.distribution = "CUDA_HOME"

def raise_not_found_error(self) -> None:
err = ", ".join(self.error_messages)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,17 @@
class DynamicLibNotFoundError(RuntimeError):
pass

@dataclass
class Distribution:
name: str
version: str

@dataclass
class LoadedDL:
abs_path: Optional[str]
was_already_loaded_from_elsewhere: bool
_handle_uint: int # Platform-agnostic unsigned pointer value

distribution: Optional[Distribution] = None

def load_dependencies(libname: str, load_func: Callable[[str], LoadedDL]) -> None:
for dep in DIRECT_DEPENDENCIES.get(libname, ()):
Expand Down
28 changes: 15 additions & 13 deletions cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
from typing import Optional, cast

from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL, Distribution
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
LIBNAMES_REQUIRING_RTLD_DEEPBIND,
SUPPORTED_LINUX_SONAMES,
Expand Down Expand Up @@ -170,9 +170,21 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]:
abs_path = abs_path_for_dynamic_library(libname, handle)
if abs_path is None:
raise RuntimeError(f"No expected symbol for {libname=!r}")
return LoadedDL(abs_path, False, handle._handle)
return LoadedDL(abs_path, False, handle._handle, Distribution("system", None))
return None

def find_with_system_search_linux(libname: str) -> Optional[str]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two things stand out here:

  • This function is an almost exact copy of load_with_system_search() and could be implemented as return load_with_system_search(libname).abs_path

  • But I wouldn't want to give this function a name that starts with "find" because it is actually loading the library, which is a side-effect with very strong consequences. — See also Add find_nvidia_dynamic_library to find the DSO location without loading #757

To maintain that the code reflects actual (inconvenient) realities, it'd be great to avoid adding the find_with_system_search_* functions and try_with_system_search() in find_nvidia_dynamic_lib.py‎, even if that means extra wrinkles elsewhere.

Copy link
Contributor Author

@brandon-b-miller brandon-b-miller Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we switch to a version of find_with_system_search that doesn't load, but does some kind of existence check on the file? That would allow us to have a really clean separation between "finding" and "loading"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately not, beyond the subprocess idea that's tracked already under #757.

To double-check, I just asked ChatGPT 5 Pro (which didn't exist last time I was working on it):

This was my prompt:


I believe I asked before, but could you please give this another very thorough look?

Platform: Linux only (please ignore any other platforms)

Python versions: 3.13 (please ignore any older Python versions)

Consider this example code:

import ctypes
import os


soname = "libcublas.so.13"
cdll_mode = os.RTLD_NOW | os.RTLD_GLOBAL
try:
    ctypes.CDLL(soname, cdll_mode)
except OSError:
    print(f"Failure loading {soname}")
else:
    print(f"Success loading {soname}")

On my workstation it prints Success loading libcublas.so.13

My question: Is there any way to implement

  • find the dynamic library (libcublas.so.13 in the example) in exactly the same way,

  • but do not actually load it,

  • WITHOUT reverse engineering the search for the file to load?


It reasoned for 15 minutes!

For the full answer, please see https://chatgpt.com/share/68dc09d0-c928-8008-9a55-a5aede0dbbf8

Bottom line

  • In‑process, no‑load, exact resolution: there isn’t a public API.

  • Practical solution that’s exact but safe: spawn a one‑shot helper ...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To track some pointers here:

When asking ChatGPT about a brute-force idea, surprisingly suggested a new approach:

The interesting part from there:


A practical, non‑subprocess alternative (doesn’t load the target)

If you’re willing to slightly relax “no reverse‑engineering,” there is a clean way that doesn’t load the library and doesn’t guess the search order:

Ask the dynamic linker for the exact directory search list it would use for a dlopen() from your process, then walk that list in order to find the first directory containing libcublas.so.13.

On glibc, this is what dlinfo(..., RTLD_DI_SERINFO, ...) is for: it returns the actual run‑time search path the linker will follow (including LD_LIBRARY_PATH, RUNPATH/RPATH, $ORIGIN expansions, and system directories configured via ld.so.conf). You then check those directories in order for the file name. That way, you’re not re‑implementing the rules—you’re reading the linker’s own computed search list—and you never map the target library.


I then asked it to generate the code, which it did in a couple minutes:

But running that code, it turns out:

RTLD_DI_SERINFO returns the directory walk order the dynamic linker would use when it actually needs to scan directories (e.g., from LD_LIBRARY_PATH or a DT_RPATH/DT_RUNPATH on the handle you pass). It does not dump everything from /etc/ld.so.conf.d/*. For most system/extra dirs (like your CUDA entry), glibc doesn’t “search” them at runtime; it consults the binary cache /etc/ld.so.cache to map the SONAME → full path directly. Because there’s no directory walk involved, those directories don’t appear in the SERINFO list.

The only non-reverse-engineering way to get at the binary cache is to run ldconfig -p. So we need a subprocess and string parsing — not great IMO.

I then asked it about the equivalent on Windows, for which it also quickly generated code, but there are also caveats.

I decided to stop here:

  • Implementing find_nvidia_dynamic_lib() that's mostly equivalent to load_nvidia_dynamic_lib() seems feasible now, but it's far from straightforward.

  • I'm really not sure if the value of having that independent find function justifies dealing with the limitations and extra complications.

for soname in get_candidate_sonames(libname):
try:
handle = _load_lib(libname, soname)
except OSError:
pass
else:
abs_path = abs_path_for_dynamic_library(libname, handle)
if abs_path is None:
raise RuntimeError(f"No expected symbol for {libname=!r}")
return abs_path
return None

def _work_around_known_bugs(libname: str, found_path: str) -> None:
if libname == "nvrtc":
Expand All @@ -194,21 +206,11 @@ def _work_around_known_bugs(libname: str, found_path: str) -> None:


def load_with_abs_path(libname: str, found_path: str) -> LoadedDL:
"""Load a dynamic library from the given path.

Args:
libname: The name of the library to load
found_path: The absolute path to the library file

Returns:
A LoadedDL object representing the loaded library

Raises:
RuntimeError: If the library cannot be loaded
"""
_work_around_known_bugs(libname, found_path)
try:
handle = _load_lib(libname, found_path)
except OSError as e:
raise RuntimeError(f"Failed to dlopen {found_path}: {e}") from e
return LoadedDL(found_path, False, handle._handle)

18 changes: 18 additions & 0 deletions cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,24 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]:

return None

def find_with_system_search_windows(libname: str) -> Optional[str]:
"""Try to find a DLL using system search paths.

Args:
libname: The name of the library to find

Returns:
The absolute path to the DLL if found, None otherwise
"""
# Reverse tabulated names to achieve new → old search order.
for dll_name in reversed(SUPPORTED_WINDOWS_DLLS.get(libname, ())):
handle = kernel32.GetModuleHandleW(dll_name)
if handle:
abs_path = abs_path_for_dynamic_library(libname, handle)
return abs_path

return None


def load_with_abs_path(libname: str, found_path: str) -> LoadedDL:
"""Load a dynamic library from the given path.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,51 +6,27 @@
import sys

from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import _FindNvidiaDynamicLib
from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL, load_dependencies
from cuda.pathfinder._dynamic_libs.load_dl_common import Distribution, LoadedDL, load_dependencies
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS

if IS_WINDOWS:
from cuda.pathfinder._dynamic_libs.load_dl_windows import (
check_if_already_loaded_from_elsewhere,
load_with_abs_path,
load_with_system_search,
)
else:
from cuda.pathfinder._dynamic_libs.load_dl_linux import (
check_if_already_loaded_from_elsewhere,
load_with_abs_path,
load_with_system_search,
)

from typing import Callable, Optional

def _load_lib_no_cache(libname: str) -> LoadedDL:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect it'll be simpler (less code) to wrap this function in a helper that adds found_via into LoadedDL.

def _load_lib_no_cache_impl(libname: str) -> LoadedDL, str
    existing code, but we also keep track of `found_via` and return (loaded, found_via)
def _load_lib_no_cache(libname: str) -> LoadedDL:
    loaded, found_via = _load_lib_no_cache_impl(libname)
    loaded.found_via = FoundVia(found_via)
    return loaded

Alternatively, we could pass found_via down along with abs_path, so that the LoadedDL instances can be constructed with it, rather than retrofitting it in later. That's a bit more surgical, not sure how it'll shake out, but it would seem cleanest.

finder = _FindNvidiaDynamicLib(libname)
abs_path = finder.try_site_packages()
if abs_path is None:
abs_path = finder.try_with_conda_prefix()

# If the library was already loaded by someone else, reproduce any OS-specific
# side-effects we would have applied on a direct absolute-path load (e.g.,
# AddDllDirectory on Windows for libs that require it).
loaded = check_if_already_loaded_from_elsewhere(libname, abs_path is not None)

# Load dependencies regardless of who loaded the primary lib first.
# Doing this *after* the side-effect ensures dependencies resolve consistently
# relative to the actually loaded location.
load_dependencies(libname, load_nvidia_dynamic_lib)

if loaded is not None:
return loaded

if abs_path is None:
loaded = load_with_system_search(libname)
if loaded is not None:
return loaded
abs_path = finder.try_with_cuda_home()
if abs_path is None:
finder.raise_not_found_error()

return load_with_abs_path(libname, abs_path)
def _load_lib_no_cache(libname: str) -> LoadedDL:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to move this below the class _LoadNvidiaDynamicLib code (it's more in line with how I organized the rest of the pathfinder code).

Also, this could be a one-liner:

    return _LoadNvidiaDynamicLib(libname).load_lib()

loader = _LoadNvidiaDynamicLib(libname)
dl = loader.load_lib()
return dl


@functools.cache
Expand Down Expand Up @@ -130,3 +106,75 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
f" {sys.version_info.major}.{sys.version_info.minor}"
)
return _load_lib_no_cache(libname)


class _LoadNvidiaDynamicLib:
def __init__(self, libname: str):
self.finder = _FindNvidiaDynamicLib(libname)
self.libname = self.finder.libname

def _load_with_dependencies(
self, get_path_func: Callable[[], Optional[LoadedDL]], dist_name: str
) -> Optional[LoadedDL]:
# If the library was already loaded by someone else, reproduce any OS-specific
# side-effects we would have applied on a direct absolute-path load (e.g.,
# AddDllDirectory on Windows for libs that require it).
loaded = check_if_already_loaded_from_elsewhere(self.libname, True)

# Load dependencies regardless of who loaded the primary lib first.
# Doing this *after* the side-effect ensures dependencies resolve consistently
# relative to the actually loaded location.
load_dependencies(self.libname, load_nvidia_dynamic_lib)

abs_path = get_path_func()
if abs_path is None:
return None

dist = Distribution(name=dist_name, version="unknown")
if loaded is not None:
loaded.distribution = dist
return loaded

dl = load_with_abs_path(self.libname, abs_path)
dl.distribution = dist
return dl

def _load_simple(self, get_path_func: Callable[[], Optional[LoadedDL]], dist_name: str) -> Optional[LoadedDL]:
abs_path = get_path_func()
if abs_path is None:
return None

dl = load_with_abs_path(self.libname, abs_path)
dl.distribution = Distribution(name=dist_name, version="unknown")
return dl

def load_with_site_packages(self) -> Optional[LoadedDL]:
return self._load_simple(self.finder.try_site_packages, "site-packages")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this changes the behavior in a subtle but deep way: loading of dependencies should happen in all cases. The interactions are tricky, but that's just a consequence of the realities. The existing order

  1. find specific lib
  2. load_dependencies unconditionally (no matter what was found!)
  3. load specific lib

is very intentional. See the existing comments in existing lines 39-39.

I guess we could change the order to

  1. load_dependencies unconditionally
  2. find specific lib
  3. load specific lib

but I didn't do that because there could be exceptions in the "find specific lib" step, and we'd have the side-effects from loading the dependencies already. It's not clear-cut if that's better or worse; I decided side-effects later is probably better.

I'm thinking it'd be better to be more conservative with the code organization when adding in the Distribution code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. lets unpack the four ways of loading things back out into their own methods then. Updates to follow.


def load_with_conda_prefix(self) -> Optional[LoadedDL]:
return self._load_simple(self.finder.try_with_conda_prefix, "conda")

def load_with_system_search(self) -> Optional[LoadedDL]:
return self._load_with_dependencies(self.finder.try_with_system_search, "system")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, if we say "system" here, it's not actually so much about "Distribution" but our chosen approach to searching.

Two ideas to explain what I have in mind, for discussion:

  • Instead of Distribution we could call the new member FoundVia. Technically that's more to the point.

  • Or we could analyze what we found and say aha, that's in a CTK package installed from a runfile, or apt, or ... distribution.

The aha part is way more tricky. Is there any value to it, for the purposes of numba-cuda?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's enough for numba-cuda to report the source and the path. Right now we also report the name of the package the library was found from (nvcc in this case) but if the path is reported I think it's less critical to have that as well.

Finding nvvm from NVIDIA NVCC Wheel                                                                                                                                                                                                                                                                                        
        Located at /raid/mambaforge/envs/numba-cuda/lib/python3.12/site-packages/nvidia/cuda_nvcc/nvvm/lib64/libnvvm.so  


def load_with_cuda_home(self) -> Optional[LoadedDL]:
return self._load_with_dependencies(self.finder.try_with_cuda_home, "CUDA_HOME")

def load_lib(self) -> LoadedDL:
dl = self.load_with_site_packages()
if dl is not None:
return dl

dl = self.load_with_conda_prefix()
if dl is not None:
return dl

dl = self.load_with_system_search()
if dl is not None:
return dl

dl = self.load_with_cuda_home()
if dl is not None:
return dl
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given the unifying group of four methods, you could use a loop like this:

        if loaded := self._maybe_return_loaded(self.finder.abs_path is not None):
            return loaded
        for load_meth, found_via in (
            (self.load_with_site_packages, "site-packages"),
            (self.load_with_conda_prefix, "conda"),
            (self.load_with_system_search, "system-search"),
            (self.load_with_cuda_home, "CUDA_HOME"),
        ):
            if loaded := load_meth():
                loaded.foundvia = FoundVia(found_via)
                return loaded


self.finder.raise_not_found_error()