Skip to content

Commit

Permalink
143 Parse and Scan Dependencies from pyproject.toml (#238)
Browse files Browse the repository at this point in the history
* Deprecates DependencyType, as DependencySection already exists and covers the same concept

* Starts work on new pyproject.toml dep scanner

* Introduces MatchSpec to ProjectDependency type to parse and (eventually) leverage constraint information

* Adds exception handling for bad pyproject.toml files

* Adds MVP validation for the pyproject.toml scanner

* Adds support for using non-standard project file names

* Minor fix to dict.get() calls

* Adds initial pyproject.toml scanner tests

* Adds test coverage for pyproject.toml scanner

* Adds version constraint test for pyproject.toml scanner
  • Loading branch information
schuylermartin45 authored Nov 12, 2024
1 parent d19b1c7 commit 52558c9
Show file tree
Hide file tree
Showing 12 changed files with 484 additions and 30 deletions.
25 changes: 21 additions & 4 deletions conda_recipe_manager/scanner/dependency/base_dep_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,35 @@
from abc import ABCMeta, abstractmethod
from typing import NamedTuple

from conda_recipe_manager.types import DependencyType, MessageTable
from conda_recipe_manager.parser.dependency import DependencyData, DependencySection, dependency_data_from_str
from conda_recipe_manager.types import MessageTable


class ProjectDependency(NamedTuple):
"""
A dependency found by scanning a software project's files.
Not to be confused with `conda_recipe_manager.parser.dependency.Dependency`.
Not to be confused with the `conda_recipe_manager.parser.dependency.Dependency` type, which can be derived from
recipe file information.
"""

name: str
type: DependencyType
data: DependencyData
type: DependencySection


def new_project_dependency(s: str, t: DependencySection) -> ProjectDependency:
"""
Convenience constructor for the `ProjectDependency` structure.
:param s: String containing the dependency name and optional version constraints.
:param t: Type of dependency. This also correlates with the section this dependency should be put in, in a `conda`
recipe file.
:returns: A newly constructed `ProjectDependency` instance.
"""
return ProjectDependency(
data=dependency_data_from_str(s),
type=t,
)


class BaseDependencyScanner(metaclass=ABCMeta):
Expand Down
24 changes: 17 additions & 7 deletions conda_recipe_manager/scanner/dependency/py_dep_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@
from pathlib import Path
from typing import Final

from conda_recipe_manager.scanner.dependency.base_dep_scanner import BaseDependencyScanner, ProjectDependency
from conda_recipe_manager.types import DependencyType, MessageCategory
from conda_recipe_manager.parser.dependency import DependencySection
from conda_recipe_manager.scanner.dependency.base_dep_scanner import (
BaseDependencyScanner,
ProjectDependency,
new_project_dependency,
)
from conda_recipe_manager.types import MessageCategory

# Table that maps import names that do not match the package name for common packages. See this StackOverflow post for
# more details:
Expand Down Expand Up @@ -117,10 +122,12 @@ def _scan_one_file(self, file: Path) -> set[ProjectDependency]:
# Most Python imports fall under the `run` section in the Conda recipe format. The major exception is
# any import found in test code.
dep_type = (
DependencyType.TEST if PythonDependencyScanner._is_likely_test_file(file) else DependencyType.RUN
DependencySection.TESTS
if PythonDependencyScanner._is_likely_test_file(file)
else DependencySection.RUN
)

deps.add(ProjectDependency(package_name, dep_type))
deps.add(new_project_dependency(package_name, dep_type))

return deps

Expand All @@ -145,7 +152,10 @@ def scan(self) -> set[ProjectDependency]:
# `RUN` dependencies are automatically added as `TEST` dependencies, so we need to filter if there are
# (effectively) duplicates
def _filter_test_duplicates(dep: ProjectDependency) -> bool:
if dep.type == DependencyType.TEST and ProjectDependency(dep.name, DependencyType.RUN) in all_imports:
if (
dep.type == DependencySection.TESTS
and ProjectDependency(dep.data, DependencySection.RUN) in all_imports
):
return False
return True

Expand All @@ -157,7 +167,7 @@ def _filter_test_duplicates(dep: ProjectDependency) -> bool:
# TODO filter unused imports

# Python is inherently a HOST and RUN dependency for all Python projects.
all_imports.add(ProjectDependency("python", DependencyType.HOST))
all_imports.add(ProjectDependency("python", DependencyType.RUN))
all_imports.add(new_project_dependency("python", DependencySection.HOST))
all_imports.add(new_project_dependency("python", DependencySection.RUN))

return all_imports
76 changes: 76 additions & 0 deletions conda_recipe_manager/scanner/dependency/pyproject_dep_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""
:Description: Reads dependencies from a `pyproject.toml` file.
"""

from __future__ import annotations

import tomllib
from pathlib import Path
from typing import Final, cast

from conda_recipe_manager.parser.dependency import DependencySection
from conda_recipe_manager.scanner.dependency.base_dep_scanner import (
BaseDependencyScanner,
ProjectDependency,
new_project_dependency,
)
from conda_recipe_manager.types import MessageCategory


class PyProjectDependencyScanner(BaseDependencyScanner):
"""
Dependency Scanner class capable of scanning `pyproject.toml` files.
"""

def __init__(self, src_dir: Path | str, project_file_name: str = "pyproject.toml"):
"""
Constructs a `PyProjectDependencyScanner`.
:param src_dir: Path to the Python source code to scan.
:param project_file_name: (Optional) Allows for custom pyproject file names. Primarily used for testing,
defaults to standard `pyproject.toml` name.
"""
super().__init__()
self._src_dir: Final[Path] = Path(src_dir)
self._project_fn: Final[str] = project_file_name

def scan(self) -> set[ProjectDependency]:
"""
Actively scans a project for dependencies. Implementation is dependent on the type of scanner used.
:returns: A set of unique dependencies found by the scanner, if any are found.
"""
try:
with open(self._src_dir / self._project_fn, "rb") as f:
data = cast(dict[str, dict[str, list[str] | dict[str, list[str]]]], tomllib.load(f))
except (FileNotFoundError, tomllib.TOMLDecodeError) as e:
if isinstance(e, FileNotFoundError):
self._msg_tbl.add_message(MessageCategory.EXCEPTION, f"`{self._project_fn}` file not found.")
if isinstance(e, tomllib.TOMLDecodeError):
self._msg_tbl.add_message(MessageCategory.EXCEPTION, f"Could not parse `{self._project_fn}` file.")
return set()

# NOTE: There is a `validate-pyproject` library hosted on `conda-forge`, but it is marked as "experimental" by
# its maintainers. Given that and that we only read a small portion of the file, we only validate what we use.
if "project" not in data:
self._msg_tbl.add_message(
MessageCategory.ERROR, f"`{self._project_fn}` file is missing a `project` section."
)
return set()

# NOTE: The dependency constraint system used in `pyproject.toml` appears to be compatible with `conda`'s
# `MatchSpec` object. For now, dependencies that can't be parsed with `MatchSpec` will store the raw string in
# a `.name` field.
# TODO Future, consider handling Environment Markers:
# https://packaging.python.org/en/latest/specifications/dependency-specifiers/#environment-markers
deps: set[ProjectDependency] = set()
for dep_name in cast(list[str], data["project"].get("dependencies", [])):
deps.add(new_project_dependency(dep_name, DependencySection.RUN))

# Optional dependencies are stored in a dictionary, where the key is the "package extra" name and the value is
# a dependency list. For example: {'dev': ['pytest'], 'conda_build': ['conda-build']}
for dep_lst in cast(dict[str, list[str]], data["project"].get("optional-dependencies", {})).values():
for dep_name in dep_lst:
deps.add(new_project_dependency(dep_name, DependencySection.RUN_CONSTRAINTS))

return deps
11 changes: 0 additions & 11 deletions conda_recipe_manager/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,6 @@ def __new__(cls) -> SentinelType:
return _schema_type_singleton


class DependencyType(StrEnum):
"""
Enumerates the dependency categories found in Conda recipe files.
"""

BUILD = auto()
HOST = auto()
RUN = auto()
TEST = auto()


class MessageCategory(StrEnum):
"""
Categories to classify messages into.
Expand Down
17 changes: 9 additions & 8 deletions tests/scanner/dependency/test_py_dep_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
from typing import Final, cast

import pytest
from conda.models.match_spec import MatchSpec
from pyfakefs.fake_filesystem import FakeFilesystem

from conda_recipe_manager.parser.dependency import DependencySection
from conda_recipe_manager.scanner.dependency.base_dep_scanner import ProjectDependency
from conda_recipe_manager.scanner.dependency.py_dep_scanner import PythonDependencyScanner
from conda_recipe_manager.types import DependencyType
from tests.file_loading import get_test_path


Expand All @@ -20,13 +21,13 @@
(
"dummy_py_project_01",
{
ProjectDependency("conda_recipe_manager", DependencyType.RUN),
ProjectDependency("matplotlib", DependencyType.RUN), # Two imports on one line
ProjectDependency("networkx", DependencyType.RUN), # Two imports on one line
ProjectDependency("python", DependencyType.HOST),
ProjectDependency("python", DependencyType.RUN),
ProjectDependency("pyyaml", DependencyType.TEST),
ProjectDependency("requests", DependencyType.RUN), # Found in source and test code.
ProjectDependency(MatchSpec("conda_recipe_manager"), DependencySection.RUN),
ProjectDependency(MatchSpec("matplotlib"), DependencySection.RUN), # Two imports on one line
ProjectDependency(MatchSpec("networkx"), DependencySection.RUN), # Two imports on one line
ProjectDependency(MatchSpec("python"), DependencySection.HOST),
ProjectDependency(MatchSpec("python"), DependencySection.RUN),
ProjectDependency(MatchSpec("pyyaml"), DependencySection.TESTS),
ProjectDependency(MatchSpec("requests"), DependencySection.RUN), # Found in source and test code.
},
),
],
Expand Down
116 changes: 116 additions & 0 deletions tests/scanner/dependency/test_pyproject_dep_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""
:Description: Provides unit tests for the `PyProjectDependencyScanner` class.
"""

import pytest
from conda.models.match_spec import MatchSpec

from conda_recipe_manager.parser.dependency import DependencySection
from conda_recipe_manager.scanner.dependency.base_dep_scanner import ProjectDependency
from conda_recipe_manager.scanner.dependency.pyproject_dep_scanner import PyProjectDependencyScanner
from conda_recipe_manager.types import MessageCategory
from tests.file_loading import get_test_path


@pytest.mark.parametrize(
"project_fn,expected",
[
(
"crm_mock_pyproject.toml",
{
ProjectDependency(MatchSpec("click"), DependencySection.RUN),
ProjectDependency(MatchSpec("jinja2"), DependencySection.RUN),
ProjectDependency(MatchSpec("pyyaml"), DependencySection.RUN),
ProjectDependency(MatchSpec("jsonschema"), DependencySection.RUN),
ProjectDependency(MatchSpec("requests"), DependencySection.RUN),
ProjectDependency(MatchSpec("gitpython"), DependencySection.RUN),
ProjectDependency(MatchSpec("networkx"), DependencySection.RUN),
ProjectDependency(MatchSpec("matplotlib"), DependencySection.RUN),
ProjectDependency(MatchSpec("pygraphviz"), DependencySection.RUN),
# Optional dependencies
ProjectDependency(MatchSpec("pytest"), DependencySection.RUN_CONSTRAINTS),
ProjectDependency(MatchSpec("conda-build"), DependencySection.RUN_CONSTRAINTS),
},
),
(
"crm_mock_pyproject_version_constraints.toml",
{
ProjectDependency(MatchSpec("click >= 1.2"), DependencySection.RUN),
ProjectDependency(MatchSpec("jinja2"), DependencySection.RUN),
ProjectDependency(MatchSpec("pyyaml"), DependencySection.RUN),
ProjectDependency(MatchSpec("jsonschema"), DependencySection.RUN),
ProjectDependency(MatchSpec("requests >= 2.8.1, == 2.8.*"), DependencySection.RUN),
ProjectDependency(MatchSpec("gitpython"), DependencySection.RUN),
ProjectDependency(MatchSpec("networkx"), DependencySection.RUN),
ProjectDependency(MatchSpec("matplotlib"), DependencySection.RUN),
ProjectDependency(MatchSpec("pygraphviz"), DependencySection.RUN),
# Optional dependencies
ProjectDependency(MatchSpec("pytest ~= 8.1"), DependencySection.RUN_CONSTRAINTS),
ProjectDependency(MatchSpec("conda-build"), DependencySection.RUN_CONSTRAINTS),
},
),
(
"crm_mock_pyproject_only_deps.toml",
{
ProjectDependency(MatchSpec("click"), DependencySection.RUN),
ProjectDependency(MatchSpec("jinja2"), DependencySection.RUN),
ProjectDependency(MatchSpec("pyyaml"), DependencySection.RUN),
ProjectDependency(MatchSpec("jsonschema"), DependencySection.RUN),
ProjectDependency(MatchSpec("requests"), DependencySection.RUN),
ProjectDependency(MatchSpec("gitpython"), DependencySection.RUN),
ProjectDependency(MatchSpec("networkx"), DependencySection.RUN),
ProjectDependency(MatchSpec("matplotlib"), DependencySection.RUN),
ProjectDependency(MatchSpec("pygraphviz"), DependencySection.RUN),
},
),
(
"crm_mock_pyproject_only_optional.toml",
{
ProjectDependency(MatchSpec("pytest"), DependencySection.RUN_CONSTRAINTS),
ProjectDependency(MatchSpec("conda-build"), DependencySection.RUN_CONSTRAINTS),
},
),
],
)
def test_scan(project_fn: str, expected: set[ProjectDependency]) -> None:
"""
Tests scanning for Python dependencies with a mocked-out Python project.
:param project_fn: Name of the dummy `pyproject.toml` file to use.
:param expected: Expected value
"""
scanner = PyProjectDependencyScanner(get_test_path() / "pyproject_toml", project_fn)
assert scanner.scan() == expected


def test_scan_missing_pyproject() -> None:
"""
Tests that the scanner fails gracefully if a `pyproject.toml` file could not be found
"""
scanner = PyProjectDependencyScanner(get_test_path() / "pyproject_toml", "the_limit_dne.toml")
assert scanner.scan() == set()
assert scanner.get_message_table().get_messages(MessageCategory.EXCEPTION) == [
"`the_limit_dne.toml` file not found."
]


def test_scan_corrupt_pyproject() -> None:
"""
Tests that the scanner fails gracefully if the `pyproject.toml` file is corrupt.
"""
scanner = PyProjectDependencyScanner(get_test_path() / "pyproject_toml", "corrupt_pyproject.toml")
assert scanner.scan() == set()
assert scanner.get_message_table().get_messages(MessageCategory.EXCEPTION) == [
"Could not parse `corrupt_pyproject.toml` file."
]


def test_scan_missing_project_pyproject() -> None:
"""
Tests that the scanner fails gracefully if the `pyproject.toml` file is missing a `project` section.
"""
scanner = PyProjectDependencyScanner(get_test_path() / "pyproject_toml", "no_project_pyproject.toml")
assert scanner.scan() == set()
assert scanner.get_message_table().get_messages(MessageCategory.ERROR) == [
"`no_project_pyproject.toml` file is missing a `project` section."
]
5 changes: 5 additions & 0 deletions tests/test_aux_files/pyproject_toml/corrupt_pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[project
dependencies = [
"click",
"jsonschema"
]
Loading

0 comments on commit 52558c9

Please sign in to comment.