From fbbcaa81d9d89321000d4c23ad46f62cc4f4e574 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 8 Oct 2023 11:05:32 +0200 Subject: [PATCH 01/10] fix everything. --- .pre-commit-config.yaml | 49 ++------- CHANGES.md | 6 +- environment.yml | 4 +- setup.cfg | 3 +- src/pytask_stata/collect.py | 186 ++++++++++++++++++++------------ src/pytask_stata/execute.py | 10 +- src/pytask_stata/parametrize.py | 15 --- src/pytask_stata/plugin.py | 2 - src/pytask_stata/shared.py | 6 +- tests/conftest.py | 63 ++++++++++- tests/test_config.py | 4 +- tests/test_execute.py | 4 +- tests/test_parallel.py | 110 ------------------- tests/test_parametrize.py | 73 ------------- tox.ini | 3 +- 15 files changed, 216 insertions(+), 322 deletions(-) delete mode 100644 src/pytask_stata/parametrize.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 16f4341..ed2e470 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,8 +3,7 @@ repos: rev: v4.4.0 hooks: - id: check-added-large-files - args: - - --maxkb=25 + args: [--maxkb=25] - id: check-case-conflict - id: check-merge-conflict - id: check-vcs-permalinks @@ -14,9 +13,7 @@ repos: - id: fix-byte-order-marker - id: mixed-line-ending - id: no-commit-to-branch - args: - - --branch - - main + args: [--branch, main] - id: trailing-whitespace - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 @@ -31,25 +28,11 @@ repos: rev: v3.12.0 hooks: - id: reorder-python-imports - args: - - --py38-plus - - --add-import - - from __future__ import annotations + args: [--py38-plus, --add-import, from __future__ import annotations] - repo: https://github.com/asottile/setup-cfg-fmt rev: v2.5.0 hooks: - id: setup-cfg-fmt -- repo: https://github.com/PyCQA/docformatter - rev: v1.7.5 - hooks: - - id: docformatter - args: - - --in-place - - --wrap-summaries - - '88' - - --wrap-descriptions - - '88' - - --blank - repo: https://github.com/psf/black rev: 23.9.1 hooks: @@ -62,18 +45,12 @@ repos: rev: v1.21.0 hooks: - id: refurb - args: - - --ignore - - FURB126 + args: [--ignore, FURB126] - repo: https://github.com/econchick/interrogate rev: 1.5.0 hooks: - id: interrogate - args: - - -v - - --fail-under=40 - - src - - tests + args: [-v, --fail-under=40, src, tests] - repo: https://github.com/executablebooks/mdformat rev: 0.7.17 hooks: @@ -81,9 +58,7 @@ repos: additional_dependencies: - mdformat-gfm - mdformat-black - args: - - --wrap - - '88' + args: [--wrap, '88'] - repo: https://github.com/codespell-project/codespell rev: v2.2.6 hooks: @@ -92,23 +67,17 @@ repos: rev: v1.5.1 hooks: - id: mypy - args: - - --no-strict-optional - - --ignore-missing-imports additional_dependencies: - attrs - - click + - pytask - types-setuptools pass_filenames: false - repo: https://github.com/mgedmin/check-manifest rev: '0.49' hooks: - id: check-manifest - args: - - --no-build-isolation - additional_dependencies: - - setuptools-scm - - toml + args: [--no-build-isolation] + additional_dependencies: [setuptools-scm, toml] - repo: meta hooks: - id: check-hooks-apply diff --git a/CHANGES.md b/CHANGES.md index 7d86f66..4ea6f48 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,7 +5,11 @@ chronological order. Releases follow [semantic versioning](https://semver.org/) releases are available on [PyPI](https://pypi.org/project/pytask-stata) and [Anaconda.org](https://anaconda.org/conda-forge/pytask-stata). -## 0.3.0 - 2023-xx-xx +## 0.4.0 - 2023-10-08 + +- {pull}`31` makes pytask-stata compatible with pytask v0.4.0. + +## 0.3.0 - 2023-01-23 - {pull}`24` adds ruff and refurb. - {pull}`25` adds docformatter. diff --git a/environment.yml b/environment.yml index 0c59547..c048d09 100644 --- a/environment.yml +++ b/environment.yml @@ -11,8 +11,8 @@ dependencies: - toml # Package dependencies - - pytask <0.4 - - pytask-parallel <0.4 + - pytask >=0.4.0 + - pytask-parallel >=0.4.0 # Misc - black diff --git a/setup.cfg b/setup.cfg index 74d0861..115f6f1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,7 +25,8 @@ project_urls = packages = find: install_requires = click - pytask>=0.3 + pluggy>=1.0.0 + pytask>=0.4.0 python_requires = >=3.8 include_package_data = True package_dir = =src diff --git a/src/pytask_stata/collect.py b/src/pytask_stata/collect.py index d65ee12..e11880c 100644 --- a/src/pytask_stata/collect.py +++ b/src/pytask_stata/collect.py @@ -1,35 +1,41 @@ """Collect tasks.""" from __future__ import annotations -import functools import subprocess -from types import FunctionType +import warnings from typing import Any -from typing import TYPE_CHECKING -from pytask import depends_on from pytask import has_mark from pytask import hookimpl +from pytask import is_task_function from pytask import Mark -from pytask import parse_nodes -from pytask import produces +from pytask import NodeInfo +from pytask import parse_dependencies_from_task_function +from pytask import parse_products_from_task_function +from pytask import PathNode +from pytask import PTask +from pytask import PythonNode from pytask import remove_marks from pytask import Session from pytask import Task +from pytask import TaskWithoutPath from pytask_stata.shared import convert_task_id_to_name_of_log_file from pytask_stata.shared import stata +from pathlib import Path -if TYPE_CHECKING: - from pathlib import Path def run_stata_script( - executable: str, script: Path, options: list[str], log_name: list[str], cwd: Path + _executable: str, + _script: Path, + _options: list[str], + _log_name: list[str], + _cwd: Path, ) -> None: """Run an R script.""" - cmd = [executable, "-e", "do", script.as_posix(), *options, *log_name] + cmd = [_executable, "-e", "do", _script.as_posix(), *_options, *_log_name] print("Executing " + " ".join(cmd) + ".") # noqa: T201 - subprocess.run(cmd, cwd=cwd, check=True) # noqa: S603 + subprocess.run(cmd, cwd=_cwd, check=True) # noqa: S603 @hookimpl @@ -41,11 +47,11 @@ def pytask_collect_task( if ( (name.startswith("task_") or has_mark(obj, "task")) - and callable(obj) + and is_task_function(obj) and has_mark(obj, "stata") ): + # Parse the @pytask.mark.stata decorator. obj, marks = remove_marks(obj, "stata") - if len(marks) > 1: raise ValueError( f"Task {name!r} has multiple @pytask.mark.stata marks, but only one is " @@ -57,47 +63,120 @@ def pytask_collect_task( obj.pytask_meta.markers.append(mark) - dependencies = parse_nodes(session, path, name, obj, depends_on) - products = parse_nodes(session, path, name, obj, produces) + # Collect the nodes in @pytask.mark.julia and validate them. + path_nodes = Path.cwd() if path is None else path.parent - markers = obj.pytask_meta.markers if hasattr(obj, "pytask_meta") else [] - kwargs = obj.pytask_meta.kwargs if hasattr(obj, "pytask_meta") else {} - - task = Task( - base_name=name, - path=path, - function=_copy_func(run_stata_script), # type: ignore[arg-type] - depends_on=dependencies, - produces=products, - markers=markers, - kwargs=kwargs, - ) + if isinstance(script, str): + warnings.warn( + "Passing a string to the @pytask.mark.stata parameter 'script' is " + "deprecated. Please, use a pathlib.Path instead.", + stacklevel=1, + ) + script = Path(script) script_node = session.hook.pytask_collect_node( - session=session, path=path, node=script + session=session, + path=path_nodes, + node_info=NodeInfo( + arg_name="script", path=(), value=script, task_path=path, task_name=name + ), + ) + + if not (isinstance(script_node, PathNode) and script_node.path.suffix == ".do"): + raise ValueError( + "The 'script' keyword of the @pytask.mark.stata decorator must point " + f"to a file with the .do suffix, but it is {script_node}." + ) + + options_node = session.hook.pytask_collect_node( + session=session, + path=path_nodes, + node_info=NodeInfo( + arg_name="_options", + path=(), + value=options, + task_path=path, + task_name=name, + ), + ) + + executable_node = session.hook.pytask_collect_node( + session=session, + path=path_nodes, + node_info=NodeInfo( + arg_name="_executable", + path=(), + value=session.config["stata"], + task_path=path, + task_name=name, + ), + ) + + cwd_node = session.hook.pytask_collect_node( + session=session, + path=path_nodes, + node_info=NodeInfo( + arg_name="_cwd", + path=(), + value=path.parent, + task_path=path, + task_name=name, + ), + ) + + dependencies = parse_dependencies_from_task_function( + session, path, name, path_nodes, obj ) + products = parse_products_from_task_function( + session, path, name, path_nodes, obj + ) + + # Add script + dependencies["_script"] = script_node + dependencies["_options"] = options_node + dependencies["_cwd"] = cwd_node + dependencies["_executable"] = executable_node + + markers = obj.pytask_meta.markers if hasattr(obj, "pytask_meta") else [] - if isinstance(task.depends_on, dict): - task.depends_on["__script"] = script_node + task: PTask + if path is None: + task = TaskWithoutPath( + name=name, + function=run_stata_script, + depends_on=dependencies, + produces=products, + markers=markers, + ) else: - task.depends_on = {0: task.depends_on, "__script": script_node} + task = Task( + base_name=name, + path=path, + function=run_stata_script, + depends_on=dependencies, + produces=products, + markers=markers, + ) + # Add log_name node that depends on the task id. if session.config["platform"] == "win32": - log_name = convert_task_id_to_name_of_log_file(task.short_name) + log_name = convert_task_id_to_name_of_log_file(task) log_name_arg = [f"-{log_name}"] else: log_name_arg = [] - stata_function = functools.partial( - task.function, - executable=session.config["stata"], - script=task.depends_on["__script"].path, - options=options, - log_name=log_name_arg, - cwd=task.path.parent, + log_name_node = session.hook.pytask_collect_node( + session=session, + path=path_nodes, + node_info=NodeInfo( + arg_name="_log_name", + path=(), + value=PythonNode(value=log_name_arg), + task_path=path, + task_name=name, + ), ) - - task.function = stata_function + task.depends_on["_log_name"] = log_name_node return task return None @@ -111,28 +190,3 @@ def _parse_stata_mark(mark: Mark) -> Mark: mark = Mark("stata", (), parsed_kwargs) return mark - - -def _copy_func(func: FunctionType) -> FunctionType: - """Create a copy of a function. - - Based on https://stackoverflow.com/a/13503277/7523785. - - Example - ------- - >>> def _func(): pass - >>> copied_func = _copy_func(_func) - >>> _func is copied_func - False - - """ - new_func = FunctionType( - func.__code__, - func.__globals__, - name=func.__name__, - argdefs=func.__defaults__, - closure=func.__closure__, - ) - new_func = functools.update_wrapper(new_func, func) - new_func.__kwdefaults__ = func.__kwdefaults__ - return new_func diff --git a/src/pytask_stata/execute.py b/src/pytask_stata/execute.py index db12876..6a2e710 100644 --- a/src/pytask_stata/execute.py +++ b/src/pytask_stata/execute.py @@ -5,14 +5,14 @@ from pytask import has_mark from pytask import hookimpl +from pytask import PTask from pytask import Session -from pytask import Task from pytask_stata.shared import convert_task_id_to_name_of_log_file from pytask_stata.shared import STATA_COMMANDS @hookimpl -def pytask_execute_task_setup(session: Session, task: Task) -> None: +def pytask_execute_task_setup(session: Session, task: PTask) -> None: """Check if Stata is found on the PATH.""" if has_mark(task, "stata") and session.config["stata"] is None: raise RuntimeError( @@ -24,7 +24,7 @@ def pytask_execute_task_setup(session: Session, task: Task) -> None: @hookimpl -def pytask_execute_task_teardown(session: Session, task: Task) -> None: +def pytask_execute_task_teardown(session: Session, task: PTask) -> None: """Check if the log file contains no error code. Stata has the weird behavior of always returning an exit code of 0 even if an error @@ -36,10 +36,10 @@ def pytask_execute_task_teardown(session: Session, task: Task) -> None: """ if has_mark(task, "stata"): if session.config["platform"] == "win32": - log_name = convert_task_id_to_name_of_log_file(task.short_name) + log_name = convert_task_id_to_name_of_log_file(task) path_to_log = task.path.with_name(log_name).with_suffix(".log") else: - node = task.depends_on["__script"] + node = task.depends_on["_script"] path_to_log = node.path.with_suffix(".log") n_lines = session.config["stata_check_log_lines"] diff --git a/src/pytask_stata/parametrize.py b/src/pytask_stata/parametrize.py deleted file mode 100644 index 3afec0e..0000000 --- a/src/pytask_stata/parametrize.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Parametrize tasks.""" -from __future__ import annotations - -from typing import Any -from typing import Callable - -import pytask -from _pytask.config import hookimpl - - -@hookimpl -def pytask_parametrize_kwarg_to_marker(obj: Callable[..., Any], kwargs: Any) -> None: - """Attach parametrized stata arguments to the function with a marker.""" - if callable(obj) and "stata" in kwargs: - pytask.mark.stata(**kwargs.pop("stata"))(obj) diff --git a/src/pytask_stata/plugin.py b/src/pytask_stata/plugin.py index a8f4e46..bf17cd2 100644 --- a/src/pytask_stata/plugin.py +++ b/src/pytask_stata/plugin.py @@ -8,7 +8,6 @@ from pytask_stata import collect from pytask_stata import config from pytask_stata import execute -from pytask_stata import parametrize if TYPE_CHECKING: from pluggy import PluginManager @@ -21,4 +20,3 @@ def pytask_add_hooks(pm: PluginManager) -> None: pm.register(collect) pm.register(config) pm.register(execute) - pm.register(parametrize) diff --git a/src/pytask_stata/shared.py b/src/pytask_stata/shared.py index f36c104..a1c7b77 100644 --- a/src/pytask_stata/shared.py +++ b/src/pytask_stata/shared.py @@ -7,7 +7,10 @@ from typing import Sequence from typing import TYPE_CHECKING + + if TYPE_CHECKING: + from pytask import PTask from pathlib import Path @@ -58,7 +61,7 @@ def stata( return script, options -def convert_task_id_to_name_of_log_file(id_: str) -> str: +def convert_task_id_to_name_of_log_file(task: PTask) -> str: """Convert task to id to name of log file. If one passes the complete task id as the log file name, Stata would remove parent @@ -78,6 +81,7 @@ def convert_task_id_to_name_of_log_file(id_: str) -> str: 'task_example_py_task_example[arg1]' """ + id_ = getattr(task, "short_name", task.name) id_without_parent_directories = id_.rsplit("/")[-1] converted_id = id_without_parent_directories.replace(".", "_").replace("::", "_") return converted_id diff --git a/tests/conftest.py b/tests/conftest.py index 2cb9a8c..63ab6d7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,13 @@ from __future__ import annotations import shutil +from typing import Callable import pytest from click.testing import CliRunner from pytask_stata.config import STATA_COMMANDS +import sys +from contextlib import contextmanager needs_stata = pytest.mark.skipif( @@ -16,6 +19,64 @@ ) +class SysPathsSnapshot: + """A snapshot for sys.path.""" + + def __init__(self) -> None: + self.__saved = list(sys.path), list(sys.meta_path) + + def restore(self) -> None: + sys.path[:], sys.meta_path[:] = self.__saved + + +class SysModulesSnapshot: + """A snapshot for sys.modules.""" + + def __init__(self, preserve: Callable[[str], bool] | None = None) -> None: + self.__preserve = preserve + self.__saved = dict(sys.modules) + + def restore(self) -> None: + if self.__preserve: + self.__saved.update( + (k, m) for k, m in sys.modules.items() if self.__preserve(k) + ) + sys.modules.clear() + sys.modules.update(self.__saved) + + +@contextmanager +def restore_sys_path_and_module_after_test_execution(): + sys_path_snapshot = SysPathsSnapshot() + sys_modules_snapshot = SysModulesSnapshot() + yield + sys_modules_snapshot.restore() + sys_path_snapshot.restore() + + +@pytest.fixture(autouse=True) +def _restore_sys_path_and_module_after_test_execution(): + """Restore sys.path and sys.modules after every test execution. + + This fixture became necessary because most task modules in the tests are named + `task_example`. Since the change in #424, the same module is not reimported which + solves errors with parallelization. At the same time, modules with the same name in + the tests are overshadowing another and letting tests fail. + + The changes to `sys.path` might not be necessary to restore, but we do it anyways. + + """ + with restore_sys_path_and_module_after_test_execution(): + yield + + +class CustomCliRunner(CliRunner): + def invoke(self, *args, **kwargs): + """Restore sys.path and sys.modules after an invocation.""" + with restore_sys_path_and_module_after_test_execution(): + return super().invoke(*args, **kwargs) + + @pytest.fixture() def runner(): - return CliRunner() + return CustomCliRunner() diff --git a/tests/test_config.py b/tests/test_config.py index 95a1121..2a00be9 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,12 +1,12 @@ from __future__ import annotations import pytest -from pytask import main +from pytask import build @pytest.mark.end_to_end() def test_marker_is_configured(tmp_path): - session = main({"paths": tmp_path}) + session = build(paths=tmp_path) assert "stata" in session.config assert "stata" in session.config["markers"] diff --git a/tests/test_execute.py b/tests/test_execute.py index 5a94850..8e05968 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -8,7 +8,7 @@ import pytest from pytask import cli from pytask import ExitCode -from pytask import main +from pytask import build from pytask import Mark from pytask import Session from pytask import Task @@ -120,7 +120,7 @@ def task_run_do_file(): "pytask_stata.config.shutil.which", lambda x: None # noqa: ARG005 ) - session = main({"paths": tmp_path}) + session = build(paths=tmp_path) assert session.exit_code == ExitCode.FAILED assert isinstance(session.execution_reports[0].exc_info[1], RuntimeError) diff --git a/tests/test_parallel.py b/tests/test_parallel.py index 750411e..ca34f56 100644 --- a/tests/test_parallel.py +++ b/tests/test_parallel.py @@ -2,7 +2,6 @@ from __future__ import annotations import textwrap -import time import pytest from pytask import cli @@ -23,46 +22,6 @@ ) -@needs_stata -@pytest.mark.end_to_end() -def test_parallel_parametrization_over_source_files_w_parametrize(runner, tmp_path): - source = """ - import pytask - - @pytask.mark.parametrize( - "stata, produces", [( - {"script": "script_1.do"}, "1.dta"), ({"script": "script_2.do"}, "2.dta") - ] - ) - def task_execute_do_file(): - pass - """ - tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) - - for i in range(1, 3): - do_file = f""" - sleep 4000 - sysuse auto, clear - save {i} - """ - tmp_path.joinpath(f"script_{i}.do").write_text(textwrap.dedent(do_file)) - - start = time.time() - result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.OK - duration_normal = time.time() - start - - for name in ("1.dta", "2.dta"): - tmp_path.joinpath(name).unlink() - - start = time.time() - result = runner.invoke(cli, [tmp_path.as_posix(), "-n", 2]) - assert result.exit_code == ExitCode.OK - duration_parallel = time.time() - start - - assert duration_parallel < duration_normal - - @needs_stata @pytest.mark.end_to_end() def test_parallel_parametrization_over_source_files_w_loop(runner, tmp_path): @@ -80,75 +39,19 @@ def task_execute_do_file(): tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) do_file = """ - sleep 4000 sysuse auto, clear save 1 """ tmp_path.joinpath("script_1.do").write_text(textwrap.dedent(do_file)) do_file = """ - sleep 4000 sysuse auto, clear save 2 """ tmp_path.joinpath("script_2.do").write_text(textwrap.dedent(do_file)) - start = time.time() - result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.OK - duration_normal = time.time() - start - - for name in ("1.dta", "2.dta"): - tmp_path.joinpath(name).unlink() - - start = time.time() - result = runner.invoke(cli, [tmp_path.as_posix(), "-n", 2]) - assert result.exit_code == ExitCode.OK - duration_parallel = time.time() - start - - assert duration_parallel < duration_normal - - -@needs_stata -@pytest.mark.end_to_end() -def test_parallel_parametrization_over_source_file_w_parametrize(runner, tmp_path): - source = """ - import pytask - - @pytask.mark.parametrize( - "produces, stata", - [ - ("output_1.dta", {"script": "script.do", "options": ("output_1",)}), - ("output_2.dta", {"script": "script.do", "options": ("output_2",)}) - ], - ) - def task_execute_do_file(): - pass - """ - tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) - - do_file = """ - sleep 4000 - sysuse auto, clear - args produces - save "`produces'" - """ - tmp_path.joinpath("script.do").write_text(textwrap.dedent(do_file)) - - start = time.time() - result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.OK - duration_normal = time.time() - start - - for name in ("output_1.dta", "output_2.dta"): - tmp_path.joinpath(name).unlink() - - start = time.time() result = runner.invoke(cli, [tmp_path.as_posix(), "-n", 2]) assert result.exit_code == ExitCode.OK - duration_parallel = time.time() - start - - assert duration_parallel < duration_normal @needs_stata @@ -168,24 +71,11 @@ def task_execute_do_file(): tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) do_file = """ - sleep 4000 sysuse auto, clear args produces save "`produces'" """ tmp_path.joinpath("script.do").write_text(textwrap.dedent(do_file)) - start = time.time() - result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.OK - duration_normal = time.time() - start - - for name in ("output_1.dta", "output_2.dta"): - tmp_path.joinpath(name).unlink() - - start = time.time() result = runner.invoke(cli, [tmp_path.as_posix(), "-n", 2]) assert result.exit_code == ExitCode.OK - duration_parallel = time.time() - start - - assert duration_parallel < duration_normal diff --git a/tests/test_parametrize.py b/tests/test_parametrize.py index 9ac0009..6e33f83 100644 --- a/tests/test_parametrize.py +++ b/tests/test_parametrize.py @@ -10,36 +10,6 @@ from tests.conftest import needs_stata -@needs_stata -@pytest.mark.end_to_end() -def test_parametrized_execution_of_do_file_w_parametrize(runner, tmp_path): - task_source = """ - import pytask - - @pytask.mark.parametrize( - "stata, produces", [( - {"script": "script_1.do"}, "1.dta"), ({"script": "script_2.do"}, "2.dta") - ] - ) - def task_execute_do_file(): - pass - """ - tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(task_source)) - - for i in range(1, 3): - do_file = f""" - sysuse auto, clear - save {i} - """ - tmp_path.joinpath(f"script_{i}.do").write_text(textwrap.dedent(do_file)) - - result = runner.invoke(cli, [tmp_path.as_posix()]) - - assert result.exit_code == ExitCode.OK - assert tmp_path.joinpath("1.dta").exists() - assert tmp_path.joinpath("2.dta").exists() - - @needs_stata @pytest.mark.end_to_end() def test_parametrized_execution_of_do_file_w_loop(runner, tmp_path): @@ -70,49 +40,6 @@ def task_execute_do_file(): assert tmp_path.joinpath("2.dta").exists() -@needs_stata -@pytest.mark.end_to_end() -def test_parametrize_command_line_options_w_parametrize(runner, tmp_path): - task_source = """ - import pytask - - @pytask.mark.parametrize( - "produces, stata", - [ - ("output_1.dta", {"script": "script.do", "options": ("output_1",)}), - ("output_2.dta", {"script": "script.do", "options": ("output_2",)}) - ], - ) - def task_execute_do_file(): - pass - """ - tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(task_source)) - - latex_source = """ - sysuse auto, clear - args produces - save "`produces'" - """ - tmp_path.joinpath("script.do").write_text(textwrap.dedent(latex_source)) - - result = runner.invoke(cli, [tmp_path.as_posix(), "--stata-keep-log"]) - - assert result.exit_code == ExitCode.OK - assert tmp_path.joinpath("output_1.dta").exists() - assert tmp_path.joinpath("output_2.dta").exists() - - # Test that log files with different names are produced. - if sys.platform == "win32": - assert tmp_path.joinpath( - "task_example_py_task_execute_do_file[output_1_dta-stata0].log" - ).exists() - assert tmp_path.joinpath( - "task_example_py_task_execute_do_file[output_2_dta-stata1].log" - ).exists() - else: - assert tmp_path.joinpath("script.log").exists() - - @needs_stata @pytest.mark.end_to_end() def test_parametrize_command_line_options_w_loop(runner, tmp_path): diff --git a/tox.ini b/tox.ini index 0486889..593aa91 100644 --- a/tox.ini +++ b/tox.ini @@ -9,7 +9,8 @@ conda_channels = conda-forge nodefaults conda_deps = - pytask =0.3 + pytask >=0.4 + pytask-parallel >=0.4 pytest pytest-cov pytest-xdist From 2686e16c24eebc3021025b5f1fb58be43e9433c5 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sat, 30 Mar 2024 20:13:54 +0100 Subject: [PATCH 02/10] Add test. --- src/pytask_stata/collect.py | 3 +-- src/pytask_stata/execute.py | 8 ++++++-- src/pytask_stata/shared.py | 8 ++++++-- tests/conftest.py | 4 ++-- tests/test_execute.py | 32 +++++++++++++++++++++++++++++++- 5 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/pytask_stata/collect.py b/src/pytask_stata/collect.py index e11880c..9bed2a3 100644 --- a/src/pytask_stata/collect.py +++ b/src/pytask_stata/collect.py @@ -3,6 +3,7 @@ import subprocess import warnings +from pathlib import Path from typing import Any from pytask import has_mark @@ -21,8 +22,6 @@ from pytask import TaskWithoutPath from pytask_stata.shared import convert_task_id_to_name_of_log_file from pytask_stata.shared import stata -from pathlib import Path - def run_stata_script( diff --git a/src/pytask_stata/execute.py b/src/pytask_stata/execute.py index 6a2e710..abec892 100644 --- a/src/pytask_stata/execute.py +++ b/src/pytask_stata/execute.py @@ -1,12 +1,13 @@ """Execute tasks.""" from __future__ import annotations +from pathlib import Path import re from pytask import has_mark from pytask import hookimpl from pytask import PTask -from pytask import Session +from pytask import Session, PTaskWithPath from pytask_stata.shared import convert_task_id_to_name_of_log_file from pytask_stata.shared import STATA_COMMANDS @@ -37,7 +38,10 @@ def pytask_execute_task_teardown(session: Session, task: PTask) -> None: if has_mark(task, "stata"): if session.config["platform"] == "win32": log_name = convert_task_id_to_name_of_log_file(task) - path_to_log = task.path.with_name(log_name).with_suffix(".log") + if isinstance(task, PTaskWithPath): + path_to_log = task.path.with_name(log_name).with_suffix(".log") + else: + path_to_log = Path.cwd(log_name).with_name(log_name).with_suffix(".log") else: node = task.depends_on["_script"] path_to_log = node.path.with_suffix(".log") diff --git a/src/pytask_stata/shared.py b/src/pytask_stata/shared.py index a1c7b77..180dac0 100644 --- a/src/pytask_stata/shared.py +++ b/src/pytask_stata/shared.py @@ -8,7 +8,6 @@ from typing import TYPE_CHECKING - if TYPE_CHECKING: from pytask import PTask from pathlib import Path @@ -83,7 +82,12 @@ def convert_task_id_to_name_of_log_file(task: PTask) -> str: """ id_ = getattr(task, "short_name", task.name) id_without_parent_directories = id_.rsplit("/")[-1] - converted_id = id_without_parent_directories.replace(".", "_").replace("::", "_") + converted_id = ( + id_without_parent_directories.replace(".", "_") + .replace("::", "_") + .replace("<", "") + .replace(">", "") + ) return converted_id diff --git a/tests/conftest.py b/tests/conftest.py index 63ab6d7..6b5c44a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,13 +1,13 @@ from __future__ import annotations import shutil +import sys +from contextlib import contextmanager from typing import Callable import pytest from click.testing import CliRunner from pytask_stata.config import STATA_COMMANDS -import sys -from contextlib import contextmanager needs_stata = pytest.mark.skipif( diff --git a/tests/test_execute.py b/tests/test_execute.py index 8e05968..f5eb9f4 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -6,9 +6,9 @@ from pathlib import Path import pytest +from pytask import build from pytask import cli from pytask import ExitCode -from pytask import build from pytask import Mark from pytask import Session from pytask import Task @@ -196,3 +196,33 @@ def task_run_do_file(): assert result.exit_code == ExitCode.COLLECTION_FAILED assert "has multiple @pytask.mark.stata marks" in result.output + + +@needs_stata +@pytest.mark.end_to_end() +def test_with_task_without_path(runner, tmp_path): + task_source = """ + import pytask + from pytask import task + + task_example = pytask.mark.stata(script="script.do")( + pytask.mark.produces("auto.dta")(task()(lambda x: None)) + ) + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(task_source)) + + do_file = """ + sysuse auto, clear + save auto + """ + tmp_path.joinpath("script.do").write_text(textwrap.dedent(do_file)) + + result = runner.invoke(cli, [tmp_path.as_posix(), "--stata-keep-log"]) + + assert result.exit_code == ExitCode.OK + assert tmp_path.joinpath("auto.dta").exists() + + if sys.platform == "win32": + assert tmp_path.joinpath("lambda.log").exists() + else: + assert tmp_path.joinpath("lambda.log").exists() From d9ae49da1b88d03fb934a823c5a3c3b13dce2fc4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 30 Mar 2024 19:15:12 +0000 Subject: [PATCH 03/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pytask_stata/execute.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pytask_stata/execute.py b/src/pytask_stata/execute.py index abec892..49c6a71 100644 --- a/src/pytask_stata/execute.py +++ b/src/pytask_stata/execute.py @@ -1,13 +1,14 @@ """Execute tasks.""" from __future__ import annotations -from pathlib import Path import re +from pathlib import Path from pytask import has_mark from pytask import hookimpl from pytask import PTask -from pytask import Session, PTaskWithPath +from pytask import PTaskWithPath +from pytask import Session from pytask_stata.shared import convert_task_id_to_name_of_log_file from pytask_stata.shared import STATA_COMMANDS From 43d0764b5005a42eed091f87f8acbbbf1f470351 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 15 May 2024 00:53:23 +0200 Subject: [PATCH 04/10] Fix. --- pyproject.toml | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ba0323d..a819432 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"] -build-backend = "setuptools.build_meta" +requires = ["hatchling", "hatch_vcs"] +build-backend = "hatchling.build" [project] name = "pytask_stata" @@ -36,6 +36,23 @@ Changelog = "https://github.com/pytask-dev/pytask-stata/blob/main/CHANGES.md" [project.entry-points] pytask = { pytask_stata = "pytask_stata.plugin" } +[tool.rye] +managed = true + +[tool.hatch.build.targets.sdist] +exclude = ["tests"] +only-packages = true + +[tool.hatch.build.targets.wheel] +exclude = ["tests"] +only-packages = true + +[tool.hatch.version] +source = "vcs" + +[tool.hatch.metadata] +allow-direct-references = true + [tool.setuptools] include-package-data = true package-dir = { "" = "src" } From 2ac31475753e2bb7fe35262d7abba38857aa3dc5 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 May 2024 13:37:26 +0200 Subject: [PATCH 05/10] Fix. --- pyproject.toml | 5 ++++- src/pytask_stata/collect.py | 8 +++++--- src/pytask_stata/shared.py | 2 +- tests/conftest.py | 2 ++ tests/test_execute.py | 7 ++++--- tests/test_normal_execution_w_plugin.py | 3 ++- 6 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a819432..5fd8eb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ classifiers = [ "Programming Language :: Python :: 3 :: Only", ] requires-python = ">=3.8" -dependencies = ["click", "pytask>=0.3,<0.4"] +dependencies = ["click", "pytask>=0.4"] dynamic = ["version"] [project.readme] @@ -38,6 +38,9 @@ pytask = { pytask_stata = "pytask_stata.plugin" } [tool.rye] managed = true +dev-dependencies = [ + "tox-uv>=1.8.2", +] [tool.hatch.build.targets.sdist] exclude = ["tests"] diff --git a/src/pytask_stata/collect.py b/src/pytask_stata/collect.py index 78dc025..64d6b92 100644 --- a/src/pytask_stata/collect.py +++ b/src/pytask_stata/collect.py @@ -2,6 +2,7 @@ from __future__ import annotations +import functools import subprocess import warnings from pathlib import Path @@ -121,7 +122,7 @@ def pytask_collect_task( node_info=NodeInfo( arg_name="_cwd", path=(), - value=path.parent, + value=path.parent.as_posix(), task_path=path, task_name=name, ), @@ -140,13 +141,14 @@ def pytask_collect_task( dependencies["_cwd"] = cwd_node dependencies["_executable"] = executable_node + partialed = functools.partial(run_stata_script, _cwd=path.parent) markers = obj.pytask_meta.markers if hasattr(obj, "pytask_meta") else [] task: PTask if path is None: task = TaskWithoutPath( name=name, - function=run_stata_script, + function=partialed, depends_on=dependencies, produces=products, markers=markers, @@ -155,7 +157,7 @@ def pytask_collect_task( task = Task( base_name=name, path=path, - function=run_stata_script, + function=partialed, depends_on=dependencies, produces=products, markers=markers, diff --git a/src/pytask_stata/shared.py b/src/pytask_stata/shared.py index 2ef7f5a..96068c2 100644 --- a/src/pytask_stata/shared.py +++ b/src/pytask_stata/shared.py @@ -46,7 +46,7 @@ def stata( *, - script: str | Path | None = None, + script: str | Path, options: str | Iterable[str] | None = None, ) -> tuple[str | Path | None, str | Iterable[str] | None]: """Specify command line options for Stata. diff --git a/tests/conftest.py b/tests/conftest.py index ac8deff..18b9414 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ import pytest from click.testing import CliRunner +from pytask import storage from pytask_stata.config import STATA_COMMANDS needs_stata = pytest.mark.skipif( @@ -72,6 +73,7 @@ def _restore_sys_path_and_module_after_test_execution(): class CustomCliRunner(CliRunner): def invoke(self, *args, **kwargs): """Restore sys.path and sys.modules after an invocation.""" + storage.create() with restore_sys_path_and_module_after_test_execution(): return super().invoke(*args, **kwargs) diff --git a/tests/test_execute.py b/tests/test_execute.py index b83d17a..e9844c8 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -105,10 +105,10 @@ def run_do_file(): @pytest.mark.end_to_end() def test_raise_error_if_stata_is_not_found(tmp_path, monkeypatch): task_source = """ - import pytask + from pytask import mark, task - @pytask.mark.stata(script="script.do") - @pytask.mark.produces("out.dta") + @task(kwargs={"produces": "out.dta"}) + @mark.stata(script="script.do") def task_run_do_file(): pass """ @@ -150,6 +150,7 @@ def task_run_do_file(): result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.OK + assert tmp_path.joinpath("out.dta").exists() @needs_stata diff --git a/tests/test_normal_execution_w_plugin.py b/tests/test_normal_execution_w_plugin.py index a7b211d..c2128de 100644 --- a/tests/test_normal_execution_w_plugin.py +++ b/tests/test_normal_execution_w_plugin.py @@ -5,6 +5,7 @@ import textwrap import pytest +from pytask import ExitCode from pytask import cli @@ -36,4 +37,4 @@ def task_example(depends_on, produces): tmp_path.joinpath(dependency).touch() result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == 0 + assert result.exit_code == ExitCode.OK From 7b2c0af9fc96c5dc6e20f02bf6480cc9dbc2027d Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 May 2024 13:53:14 +0200 Subject: [PATCH 06/10] fix. --- src/pytask_stata/collect.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/pytask_stata/collect.py b/src/pytask_stata/collect.py index 64d6b92..f0cb14d 100644 --- a/src/pytask_stata/collect.py +++ b/src/pytask_stata/collect.py @@ -63,7 +63,6 @@ def pytask_collect_task( mark = _parse_stata_mark(mark=marks[0]) script, options = stata(**marks[0].kwargs) - obj.pytask_meta.markers.append(mark) # Collect the nodes in @pytask.mark.julia and validate them. @@ -190,7 +189,5 @@ def pytask_collect_task( def _parse_stata_mark(mark: Mark) -> Mark: """Parse a Stata mark.""" script, options = stata(**mark.kwargs) - parsed_kwargs = {"script": script or None, "options": options or []} - return Mark("stata", (), parsed_kwargs) From 1cbea153d4c3d4d79c2568491bdc0228b749281d Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 May 2024 14:03:54 +0200 Subject: [PATCH 07/10] Add version. --- pyproject.toml | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5fd8eb8..c43c39a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,9 @@ dev-dependencies = [ "tox-uv>=1.8.2", ] +[tool.hatch.build.hooks.vcs] +version-file = "src/pytask_stata/_version.py" + [tool.hatch.build.targets.sdist] exclude = ["tests"] only-packages = true @@ -56,20 +59,6 @@ source = "vcs" [tool.hatch.metadata] allow-direct-references = true -[tool.setuptools] -include-package-data = true -package-dir = { "" = "src" } -zip-safe = false -platforms = ["any"] -license-files = ["LICENSE"] - -[tool.setuptools.packages.find] -where = ["src"] -namespaces = false - -[tool.setuptools_scm] -write_to = "src/pytask_stata/_version.py" - [tool.mypy] files = ["src", "tests"] check_untyped_defs = true From 79789f006841f04bebead3f70d640dfa6324163c Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 May 2024 14:45:55 +0200 Subject: [PATCH 08/10] Change log path. --- src/pytask_stata/collect.py | 8 ++++---- src/pytask_stata/execute.py | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/pytask_stata/collect.py b/src/pytask_stata/collect.py index f0cb14d..7fd847a 100644 --- a/src/pytask_stata/collect.py +++ b/src/pytask_stata/collect.py @@ -31,11 +31,11 @@ def run_stata_script( _executable: str, _script: Path, _options: list[str], - _log_name: list[str], + _log_name: str, _cwd: Path, ) -> None: """Run an R script.""" - cmd = [_executable, "-e", "do", _script.as_posix(), *_options, *_log_name] + cmd = [_executable, "-e", "do", _script.as_posix(), *_options, _log_name] print("Executing " + " ".join(cmd) + ".") # noqa: T201 subprocess.run(cmd, cwd=_cwd, check=True) # noqa: S603 @@ -165,9 +165,9 @@ def pytask_collect_task( # Add log_name node that depends on the task id. if session.config["platform"] == "win32": log_name = convert_task_id_to_name_of_log_file(task) - log_name_arg = [f"-{log_name}"] + log_name_arg = f"-{log_name}" else: - log_name_arg = [] + log_name_arg = "" log_name_node = session.hook.pytask_collect_node( session=session, diff --git a/src/pytask_stata/execute.py b/src/pytask_stata/execute.py index a52c327..0c1addf 100644 --- a/src/pytask_stata/execute.py +++ b/src/pytask_stata/execute.py @@ -12,7 +12,6 @@ from pytask import hookimpl from pytask_stata.shared import STATA_COMMANDS -from pytask_stata.shared import convert_task_id_to_name_of_log_file @hookimpl @@ -41,7 +40,7 @@ def pytask_execute_task_teardown(session: Session, task: PTask) -> None: """ if has_mark(task, "stata"): if session.config["platform"] == "win32": - log_name = convert_task_id_to_name_of_log_file(task) + log_name = task.depends_on["_log_name"] if isinstance(task, PTaskWithPath): path_to_log = task.path.with_name(log_name).with_suffix(".log") else: From cf00d6892eb0e6f11a4affc3fc58507e6b05cc67 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 May 2024 15:13:27 +0200 Subject: [PATCH 09/10] Fix. --- src/pytask_stata/collect.py | 7 +++---- src/pytask_stata/execute.py | 2 +- tests/test_execute.py | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/pytask_stata/collect.py b/src/pytask_stata/collect.py index 7fd847a..a37a5c4 100644 --- a/src/pytask_stata/collect.py +++ b/src/pytask_stata/collect.py @@ -35,7 +35,7 @@ def run_stata_script( _cwd: Path, ) -> None: """Run an R script.""" - cmd = [_executable, "-e", "do", _script.as_posix(), *_options, _log_name] + cmd = [_executable, "-e", "do", _script.as_posix(), *_options, f"-{_log_name}"] print("Executing " + " ".join(cmd) + ".") # noqa: T201 subprocess.run(cmd, cwd=_cwd, check=True) # noqa: S603 @@ -165,9 +165,8 @@ def pytask_collect_task( # Add log_name node that depends on the task id. if session.config["platform"] == "win32": log_name = convert_task_id_to_name_of_log_file(task) - log_name_arg = f"-{log_name}" else: - log_name_arg = "" + log_name = "" log_name_node = session.hook.pytask_collect_node( session=session, @@ -175,7 +174,7 @@ def pytask_collect_task( node_info=NodeInfo( arg_name="_log_name", path=(), - value=PythonNode(value=log_name_arg), + value=PythonNode(value=log_name), task_path=path, task_name=name, ), diff --git a/src/pytask_stata/execute.py b/src/pytask_stata/execute.py index 0c1addf..34d0398 100644 --- a/src/pytask_stata/execute.py +++ b/src/pytask_stata/execute.py @@ -40,7 +40,7 @@ def pytask_execute_task_teardown(session: Session, task: PTask) -> None: """ if has_mark(task, "stata"): if session.config["platform"] == "win32": - log_name = task.depends_on["_log_name"] + log_name = task.depends_on["_log_name"].load() if isinstance(task, PTaskWithPath): path_to_log = task.path.with_name(log_name).with_suffix(".log") else: diff --git a/tests/test_execute.py b/tests/test_execute.py index e9844c8..6aa5103 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -225,6 +225,6 @@ def test_with_task_without_path(runner, tmp_path): assert tmp_path.joinpath("auto.dta").exists() if sys.platform == "win32": - assert tmp_path.joinpath("lambda.log").exists() + assert tmp_path.joinpath("task_example_py_lambda.log").exists() else: - assert tmp_path.joinpath("lambda.log").exists() + assert not tmp_path.joinpath("task_example_py_lambda.log").exists() From 99b4f3461cda5eb93529f221fa68ccf14680908a Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 19 May 2024 15:18:11 +0200 Subject: [PATCH 10/10] Fix more. --- CHANGES.md | 2 +- MANIFEST.in | 12 ------------ environment.yml | 25 ------------------------- 3 files changed, 1 insertion(+), 38 deletions(-) delete mode 100644 MANIFEST.in delete mode 100644 environment.yml diff --git a/CHANGES.md b/CHANGES.md index 4ea6f48..b43a8c5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,7 +7,7 @@ releases are available on [PyPI](https://pypi.org/project/pytask-stata) and ## 0.4.0 - 2023-10-08 -- {pull}`31` makes pytask-stata compatible with pytask v0.4.0. +- {pull}`36` makes pytask-stata compatible with pytask v0.4.0. ## 0.3.0 - 2023-01-23 diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 88437a4..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,12 +0,0 @@ -prune tests - -exclude .coveragerc -exclude *.md -exclude *.yml -exclude *.yaml -exclude tox.ini - -include README.md -include LICENSE - -recursive-include src py.typed diff --git a/environment.yml b/environment.yml deleted file mode 100644 index c048d09..0000000 --- a/environment.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: pytask-stata - -channels: - - conda-forge - - nodefaults - -dependencies: - - python >=3.8 - - pip - - setuptools_scm - - toml - - # Package dependencies - - pytask >=0.4.0 - - pytask-parallel >=0.4.0 - - # Misc - - black - - pre-commit - - pytest-cov - - pytest-xdist - - tox-conda - - - pip: - - -e .