Skip to content

Commit 9d526db

Browse files
authored
Fix importing packages with missing __init__.py (#732)
1 parent 835c406 commit 9d526db

File tree

2 files changed

+193
-8
lines changed

2 files changed

+193
-8
lines changed

src/_pytask/path.py

Lines changed: 101 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import contextlib
66
import functools
7+
import importlib.machinery
78
import importlib.util
89
import itertools
910
import os
@@ -200,19 +201,31 @@ def _resolve_pkg_root_and_module_name(path: Path) -> tuple[Path, str]:
200201
201202
Passing the full path to `models.py` will yield Path("src") and "app.core.models".
202203
204+
This function also handles namespace packages (directories without __init__.py)
205+
by walking up the directory tree and checking if Python's import system can
206+
resolve the computed module name to the given path. This prevents double-imports
207+
when task files import each other using Python's standard import mechanism.
208+
203209
Raises CouldNotResolvePathError if the given path does not belong to a package
204-
(missing any __init__.py files).
210+
(missing any __init__.py files) and no valid namespace package root is found.
205211
206212
"""
213+
# First, try to find a regular package (with __init__.py files).
207214
pkg_path = _resolve_package_path(path)
208215
if pkg_path is not None:
209216
pkg_root = pkg_path.parent
210-
211-
names = list(path.with_suffix("").relative_to(pkg_root).parts)
212-
if names[-1] == "__init__":
213-
names.pop()
214-
module_name = ".".join(names)
215-
return pkg_root, module_name
217+
module_name = _compute_module_name(pkg_root, path)
218+
if module_name:
219+
return pkg_root, module_name
220+
221+
# No regular package found. Check for namespace packages by walking up the
222+
# directory tree and verifying that Python's import system would resolve
223+
# the computed module name to this file.
224+
for candidate in (path.parent, *path.parent.parents):
225+
module_name = _compute_module_name(candidate, path)
226+
if module_name and _is_importable(module_name, path):
227+
# Found a root where Python's import system agrees with our module name.
228+
return candidate, module_name
216229

217230
msg = f"Could not resolve for {path}"
218231
raise CouldNotResolvePathError(msg)
@@ -222,6 +235,81 @@ class CouldNotResolvePathError(Exception):
222235
"""Custom exception raised by _resolve_pkg_root_and_module_name."""
223236

224237

238+
def _spec_matches_module_path(
239+
module_spec: importlib.machinery.ModuleSpec | None, module_path: Path
240+
) -> bool:
241+
"""Return true if the given ModuleSpec can be used to import the given module path.
242+
243+
Handles both regular modules (via origin) and namespace packages
244+
(via submodule_search_locations).
245+
246+
"""
247+
if module_spec is None:
248+
return False
249+
250+
if module_spec.origin:
251+
return Path(module_spec.origin) == module_path
252+
253+
# For namespace packages, check submodule_search_locations.
254+
# https://docs.python.org/3/library/importlib.html#importlib.machinery.ModuleSpec.submodule_search_locations
255+
if module_spec.submodule_search_locations:
256+
for location in module_spec.submodule_search_locations:
257+
if Path(location) == module_path:
258+
return True
259+
260+
return False
261+
262+
263+
def _is_importable(module_name: str, module_path: Path) -> bool:
264+
"""Check if a module name would resolve to the given path using Python's import.
265+
266+
This verifies that importing `module_name` via Python's standard import mechanism
267+
(as if typed in the REPL) would load the file at `module_path`.
268+
269+
Note: find_spec() has a side effect of creating parent namespace packages in
270+
sys.modules. We clean these up to avoid polluting the module namespace.
271+
"""
272+
# Track modules before the call to clean up side effects
273+
modules_before = set(sys.modules.keys())
274+
275+
try:
276+
spec = importlib.util.find_spec(module_name)
277+
except (ImportError, ValueError, ImportWarning):
278+
return False
279+
finally:
280+
# Clean up any modules that were added as side effects.
281+
# find_spec() can create parent namespace packages in sys.modules.
282+
modules_added = set(sys.modules.keys()) - modules_before
283+
for mod_name in modules_added:
284+
sys.modules.pop(mod_name, None)
285+
286+
return _spec_matches_module_path(spec, module_path)
287+
288+
289+
def _compute_module_name(root: Path, module_path: Path) -> str | None:
290+
"""Compute a module name based on a path and a root anchor.
291+
292+
Returns None if the module name cannot be computed.
293+
294+
"""
295+
try:
296+
path_without_suffix = module_path.with_suffix("")
297+
except ValueError:
298+
return None
299+
300+
try:
301+
relative = path_without_suffix.relative_to(root)
302+
except ValueError:
303+
return None
304+
305+
names = list(relative.parts)
306+
if not names:
307+
return None
308+
if names[-1] == "__init__":
309+
names.pop()
310+
return ".".join(names) if names else None
311+
312+
225313
def _import_module_using_spec(
226314
module_name: str, module_path: Path, module_location: Path
227315
) -> ModuleType | None:
@@ -234,7 +322,11 @@ def _import_module_using_spec(
234322
# Checking with sys.meta_path first in case one of its hooks can import this module,
235323
# such as our own assertion-rewrite hook.
236324
for meta_importer in sys.meta_path:
237-
spec = meta_importer.find_spec(module_name, [str(module_location)])
325+
try:
326+
spec = meta_importer.find_spec(module_name, [str(module_location)])
327+
except (ImportError, KeyError, ValueError):
328+
# Some meta_path finders raise exceptions when parent modules don't exist.
329+
continue
238330
if spec is not None:
239331
break
240332
else:
@@ -243,6 +335,7 @@ def _import_module_using_spec(
243335
mod = importlib.util.module_from_spec(spec)
244336
sys.modules[module_name] = mod
245337
spec.loader.exec_module(mod) # type: ignore[union-attr]
338+
_insert_missing_modules(sys.modules, module_name)
246339
return mod
247340

248341
return None

tests/test_path.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
import importlib
4+
import importlib.util
35
import sys
46
import textwrap
57
from contextlib import ExitStack as does_not_raise # noqa: N813
@@ -389,3 +391,93 @@ def __init__(self) -> None:
389391
# Ensure we do not import the same module again (#11475).
390392
mod2 = import_path(init, root=tmp_path)
391393
assert mod is mod2
394+
395+
def test_import_path_namespace_package_consistent_with_python_import(
396+
self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
397+
) -> None:
398+
"""
399+
Ensure import_path uses module names consistent with Python's import
400+
system for namespace packages (directories without __init__.py).
401+
402+
This prevents double-imports when task files import each other.
403+
See: https://github.com/pytask-dev/pytask/issues/XXX
404+
405+
Structure:
406+
src/
407+
myproject/
408+
__init__.py <- regular package
409+
tasks/ <- NO __init__.py (namespace package)
410+
task_a.py
411+
412+
With PYTHONPATH=src, Python imports task_a.py as "myproject.tasks.task_a".
413+
pytask's import_path() should use the same name, not
414+
"src.myproject.tasks.task_a".
415+
"""
416+
# Create the directory structure
417+
src_dir = tmp_path / "src"
418+
pkg_dir = src_dir / "myproject"
419+
tasks_dir = pkg_dir / "tasks"
420+
tasks_dir.mkdir(parents=True)
421+
422+
# myproject is a regular package (has __init__.py)
423+
(pkg_dir / "__init__.py").write_text("")
424+
425+
# tasks is a namespace package (NO __init__.py)
426+
task_file = tasks_dir / "task_a.py"
427+
task_file.write_text(
428+
textwrap.dedent(
429+
"""
430+
EXECUTION_COUNT = 0
431+
432+
def on_import():
433+
global EXECUTION_COUNT
434+
EXECUTION_COUNT += 1
435+
436+
on_import()
437+
"""
438+
)
439+
)
440+
441+
# Add src/ to sys.path (simulating PYTHONPATH=src)
442+
monkeypatch.syspath_prepend(str(src_dir))
443+
444+
# Verify Python's import system would use "myproject.tasks.task_a"
445+
expected_module_name = "myproject.tasks.task_a"
446+
spec = importlib.util.find_spec(expected_module_name)
447+
assert spec is not None, (
448+
f"Python cannot find {expected_module_name!r} - test setup is wrong"
449+
)
450+
assert spec.origin == str(task_file)
451+
452+
# Now call pytask's import_path
453+
mod = import_path(task_file, root=tmp_path)
454+
455+
# The module name should match what Python's import system uses
456+
assert mod.__name__ == expected_module_name, (
457+
f"import_path() used module name {mod.__name__!r} but Python's import "
458+
f"system would use {expected_module_name!r}. This mismatch causes "
459+
f"double-imports when task files import each other."
460+
)
461+
462+
# The module should be registered in sys.modules under the correct name
463+
assert expected_module_name in sys.modules
464+
assert sys.modules[expected_module_name] is mod
465+
466+
# Importing via Python's standard mechanism should return the SAME module
467+
# (not re-execute the file)
468+
standard_import = importlib.import_module(expected_module_name)
469+
assert standard_import is mod, (
470+
"Python's import returned a different module object than import_path(). "
471+
"This means the file would be executed twice."
472+
)
473+
474+
# Verify the file was only executed once
475+
assert mod.EXECUTION_COUNT == 1, (
476+
f"Module was executed {mod.EXECUTION_COUNT} times instead of once. "
477+
f"Double-import detected!"
478+
)
479+
480+
# Cleanup
481+
sys.modules.pop(expected_module_name, None)
482+
sys.modules.pop("myproject.tasks", None)
483+
sys.modules.pop("myproject", None)

0 commit comments

Comments
 (0)