diff --git a/docs/usage.rst b/docs/usage.rst index e9c505822..31778cc4b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -39,7 +39,18 @@ passed as optional argument. Meson subprojects are automatically ignored if ``meson.build`` exists in the project root. ``--include-meson-subprojects`` overrides this behaviour. -Symbolic links and files that are zero-sized are automatically ignored. +Files that are zero-sized are automatically ignored. + +Symbolic links are handled differently depending on the target of the link: + +#. a symlink pointing to a covered file is considered to be the same file as + the covered file and is therefore ignored. +#. a symlink pointing to a file that is not a covered file is itself considered + to be a covered file and is not skipped, unless the symlink is ignored by + other means. + +A "covered file" is the term used in the REUSE Specification to name a file +that needs copyright and licensing information. annotate ======== diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py index d31b59dcb..b6a595c03 100644 --- a/src/reuse/__init__.py +++ b/src/reuse/__init__.py @@ -1,5 +1,6 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2021 Alliander N.V. +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -26,6 +27,8 @@ from boolean.boolean import Expression +import reuse.compat + try: __version__ = version("reuse") except PackageNotFoundError: diff --git a/src/reuse/_util.py b/src/reuse/_util.py index 4b29ff0ae..3d4d83b16 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -7,6 +7,7 @@ # SPDX-FileCopyrightText: 2022 Pietro Albini # SPDX-FileCopyrightText: 2023 DB Systel GmbH # SPDX-FileCopyrightText: 2023 Johannes Zarl-Zierl +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -476,6 +477,10 @@ def _check_read(self, path: Path) -> None: _("'{}' is not a directory").format(path) ) return + if not path.exists() and path.is_symlink(): + # If the path is a broken symlink we can continue, allowing usage of + # --force-dot-license even if the link target is not readable. + return raise ArgumentTypeError(_("can't open '{}'").format(path)) def _check_write(self, path: Path) -> None: diff --git a/src/reuse/compat.py b/src/reuse/compat.py new file mode 100644 index 000000000..45b4294bc --- /dev/null +++ b/src/reuse/compat.py @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: 2023 Matthias Riße +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""This module adds compatibility code like backports.""" +import os +import sys +from pathlib import Path + +# Introduce an implementation of pathlib.Path's is_relative_to in python +# versions before 3.9 +if sys.version_info < (3, 9): + + def _is_relative_to(self: Path, path: Path) -> bool: + try: + self.relative_to(path) + return True + except ValueError: + return False + + setattr(Path, "is_relative_to", _is_relative_to) + +# Introduce an implementation of pathlib.Path's readlink in python versions +# before 3.9 +if sys.version_info < (3, 9): + + def _readlink(self: Path) -> Path: + return Path(os.readlink(self)) + + setattr(Path, "readlink", _readlink) diff --git a/src/reuse/header.py b/src/reuse/header.py index 8725c94f6..07382c802 100644 --- a/src/reuse/header.py +++ b/src/reuse/header.py @@ -9,6 +9,7 @@ # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Yaman Qalieh # SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -392,7 +393,7 @@ def _is_uncommentable(path: Path) -> bool: registered as an UncommentableCommentStyle. """ is_uncommentable = _get_comment_style(path) == UncommentableCommentStyle - return is_uncommentable or is_binary(str(path)) + return is_uncommentable or path.is_symlink() or is_binary(str(path)) def _verify_paths_line_handling( diff --git a/src/reuse/project.py b/src/reuse/project.py index 9f192f9e7..d0089639f 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2023 DB Systel GmbH +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -136,8 +137,27 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: _LOGGER.debug("ignoring '%s'", the_file) continue if the_file.is_symlink(): - _LOGGER.debug("skipping symlink '%s'", the_file) - continue + # Needs to use os.path.absolute instead of Path.absolute + # since the former normalizes the path, i.e. resolves "..". + # There is no method in pathlib for this which doesn't also + # resolve symlinks recursively, like Path.resolve. + target_file = Path( + os.path.abspath(the_file.readlink()) # type: ignore + ) + _LOGGER.debug( + "'%s' is a symlink pointing to '%s'", + the_file, + target_file, + ) + if ( + target_file.is_relative_to( # type: ignore # pylint: disable=E1101 + self.root.resolve() + ) + and (target_file.exists() or target_file.is_symlink()) + and not self._is_path_ignored(target_file) + ): + _LOGGER.debug("skipping symlink '%s'", the_file) + continue # Suppressing this error because I simply don't want to deal # with that here. with contextlib.suppress(OSError): @@ -184,35 +204,39 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo: dep5_path = source_path # Search the file for REUSE information. - with path.open("rb") as fp: - try: - # Completely read the file once to search for possible snippets - if _contains_snippet(fp): - _LOGGER.debug(f"'{path}' seems to contain a SPDX Snippet") - read_limit = None - else: - read_limit = _HEADER_BYTES - # Reset read position - fp.seek(0) - # Scan the file for REUSE info, possible limiting the read - # length - file_result = extract_reuse_info( - decoded_text_from_binary(fp, size=read_limit) - ) - if file_result: - source_path = str(path) - if path.suffix == ".license": - source_type = SourceType.DOT_LICENSE_FILE + if not path.is_symlink(): + with path.open("rb") as fp: + try: + # Completely read the file once to search for possible + # snippets + if _contains_snippet(fp): + _LOGGER.debug( + f"'{path}' seems to contain a SPDX Snippet" + ) + read_limit = None else: - source_type = SourceType.FILE_HEADER - - except (ExpressionError, ParseError): - _LOGGER.error( - _( - "'{path}' holds an SPDX expression that cannot be" - " parsed, skipping the file" - ).format(path=path) - ) + read_limit = _HEADER_BYTES + # Reset read position + fp.seek(0) + # Scan the file for REUSE info, possible limiting the read + # length + file_result = extract_reuse_info( + decoded_text_from_binary(fp, size=read_limit) + ) + if file_result: + source_path = str(path) + if path.suffix == ".license": + source_type = SourceType.DOT_LICENSE_FILE + else: + source_type = SourceType.FILE_HEADER + + except (ExpressionError, ParseError): + _LOGGER.error( + _( + "'{path}' holds an SPDX expression that cannot be" + " parsed, skipping the file" + ).format(path=path) + ) # There is both information in a .dep5 file and in the file header if ( diff --git a/src/reuse/report.py b/src/reuse/report.py index 268e48430..297a8e0a8 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Pietro Albini +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -445,14 +446,14 @@ def generate( ) -> "FileReport": """Generate a FileReport from a path in a Project.""" path = Path(path) - if not path.is_file(): - raise OSError(f"{path} is not a file") + if not path.is_file() and not path.is_symlink(): + raise OSError(f"{path} is not supported") relative = project.relative_from_root(path) report = cls("./" + str(relative), path, do_checksum=do_checksum) # Checksum and ID - if report.do_checksum: + if report.do_checksum and not path.is_symlink(): report.spdxfile.chk_sum = _checksum(path) else: # This path avoids a lot of heavy computation, which is handy for diff --git a/src/reuse/vcs.py b/src/reuse/vcs.py index 3ae70b117..a22e9de6d 100644 --- a/src/reuse/vcs.py +++ b/src/reuse/vcs.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: © 2020 Liferay, Inc. # SPDX-FileCopyrightText: 2020 John Mulligan +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -99,11 +100,15 @@ def _find_all_ignored_files(self) -> Set[Path]: ] result = execute_command(command, _LOGGER, cwd=self.project.root) all_files = result.stdout.decode("utf-8").split("\0") - return {Path(file_) for file_ in all_files} + return {Path(file_) for file_ in all_files[:-1]}.union({Path(".git")}) def is_ignored(self, path: StrPath) -> bool: path = self.project.relative_from_root(path) - return path in self._all_ignored_files + return path in self._all_ignored_files or any( + path.is_relative_to(ignored_dir) # type: ignore + for ignored_dir in self._all_ignored_files + if ignored_dir.is_dir() + ) @classmethod def in_repo(cls, directory: StrPath) -> bool: @@ -163,11 +168,15 @@ def _find_all_ignored_files(self) -> Set[Path]: ] result = execute_command(command, _LOGGER, cwd=self.project.root) all_files = result.stdout.decode("utf-8").split("\0") - return {Path(file_) for file_ in all_files} + return {Path(file_) for file_ in all_files[:-1]}.union({Path(".hg")}) def is_ignored(self, path: StrPath) -> bool: path = self.project.relative_from_root(path) - return path in self._all_ignored_files + return path in self._all_ignored_files or any( + path.is_relative_to(ignored_dir) # type: ignore + for ignored_dir in self._all_ignored_files + if ignored_dir.is_dir() + ) @classmethod def in_repo(cls, directory: StrPath) -> bool: diff --git a/tests/conftest.py b/tests/conftest.py index 601f5ecc4..25260db57 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -137,6 +138,15 @@ def fake_repository(tmpdir_factory) -> Path: encoding="utf-8", ) + (directory / "symlink-to-covered").symlink_to(directory / "doc/index.rst") + (directory / "symlink-to-not-covered").symlink_to(directory) + (directory / "symlink-to-not-covered.license").write_text( + "# SPDX-FileCopyrightText: 2017 Jane Doe\n" + "#\n" + "# SPDX-License-Identifier: GPL-3.0-or-later", + encoding="utf-8", + ) + os.chdir(directory) return directory diff --git a/tests/test_main_annotate.py b/tests/test_main_annotate.py index d054fa086..511a821f5 100644 --- a/tests/test_main_annotate.py +++ b/tests/test_main_annotate.py @@ -3,18 +3,27 @@ # SPDX-FileCopyrightText: © 2020 Liferay, Inc. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later """Tests for reuse._main: annotate""" import logging import stat +from importlib import import_module from inspect import cleandoc import pytest from reuse._main import main +try: + IS_POSIX = bool(import_module("posix")) +except ImportError: + IS_POSIX = False + +posix = pytest.mark.skipif(not IS_POSIX, reason="Windows not supported") + # pylint: disable=too-many-lines,unused-argument @@ -971,6 +980,55 @@ def test_annotate_force_dot_license_doesnt_write_to_file( assert simple_file.read_text() == "Preserve this" +@posix +@pytest.mark.parametrize( + "create_target", + [True, False], + ids=map(lambda x: f"create_target={x}", [True, False]), +) +def test_annotate_force_dot_license_for_symlinks( + fake_repository, stringio, mock_date_today, create_target +): + """Annotating a symlink, broken or not, with --force-dot-license should + result in a .license file next to the symlink. + """ + target_file = fake_repository / "target-file" + if create_target: + target_file.write_text("Preserve this") + symlink = fake_repository / "symlink" + symlink.symlink_to(target_file.relative_to(fake_repository)) + expected = cleandoc( + """ + SPDX-FileCopyrightText: 2018 Jane Doe + + SPDX-License-Identifier: GPL-3.0-or-later + """ + ) + + result = main( + [ + "annotate", + "--license", + "GPL-3.0-or-later", + "--copyright", + "Jane Doe", + "--force-dot-license", + "symlink", + ], + out=stringio, + ) + + assert result == 0 + assert ( + symlink.with_name(f"{symlink.name}.license").read_text().strip() + == expected + ) + if create_target: + assert target_file.read_text() == "Preserve this" + else: + assert not symlink.exists() + + def test_annotate_to_read_only_file_does_not_traceback( fake_repository, stringio, mock_date_today ): diff --git a/tests/test_project.py b/tests/test_project.py index f0be53459..a719d0d46 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -1,11 +1,13 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: © 2020 Liferay, Inc. # SPDX-FileCopyrightText: 2022 Florian Snow +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later """Tests for reuse.project.""" +import itertools import os import shutil import warnings @@ -99,8 +101,8 @@ def test_all_files_ignore_hg(empty_directory): @posix -def test_all_files_symlinks(empty_directory): - """All symlinks must be ignored.""" +def test_all_files_ignore_symlinks_to_covered_files(empty_directory): + """All symlinks to covered files must be ignored.""" (empty_directory / "blob").write_text("foo") (empty_directory / "blob.license").write_text( cleandoc( @@ -111,9 +113,75 @@ def test_all_files_symlinks(empty_directory): """ ) ) - (empty_directory / "symlink").symlink_to("blob") + (empty_directory / "symlink0").symlink_to("blob") + for i in range(5): + (empty_directory / f"symlink{i + 1}").symlink_to(f"symlink{i}") project = Project(empty_directory) - assert Path("symlink").absolute() not in project.all_files() + for i in range(6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + +no_vcs_params = list( + filter( + lambda x: not (x[0] == "non_existent_file" and x[1] is True), + itertools.product( + [ + "../outside_file", + "non_existent_file", + ], + [False, True], + ), + ) +) + + +@posix +@pytest.mark.parametrize( + "target,create_target", + no_vcs_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", no_vcs_params), +) +def test_all_files_cover_symlinks_to_uncovered_files( + empty_directory, target, create_target +): + """All symlinks to files not covered must be included.""" + project_dir = empty_directory / "project_dir" + project_dir.mkdir() + (project_dir / "symlink").symlink_to(target) + if create_target: + (project_dir / target).parent.mkdir(parents=True, exist_ok=True) + (project_dir / target).write_text("some content") + project = Project(project_dir) + assert (project_dir / "symlink").absolute() in project.all_files() + + +@posix +@pytest.mark.parametrize( + "target,create_target", + no_vcs_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", no_vcs_params), +) +def test_all_files_ignore_symlinks_to_covered_symlinks( + empty_directory, target, create_target +): + """All symlinks to symlinks that are considered to be covered files must be + ignored. + """ + project_dir = empty_directory / "project_dir" + project_dir.mkdir() + (project_dir / "symlink0").symlink_to(target) + for i in range(5): + (project_dir / f"symlink{i + 1}").symlink_to( + project_dir / f"symlink{i}" + ) + if create_target: + (project_dir / target).parent.mkdir(parents=True, exist_ok=True) + (project_dir / target).write_text("some content") + project = Project(project_dir) + for i in range(1, 6): + assert ( + project_dir / f"symlink{i}" + ).absolute() not in project.all_files() def test_all_files_ignore_zero_sized(empty_directory): @@ -158,6 +226,81 @@ def test_all_files_git_ignored_contains_newline(git_repository): assert Path("hello\nworld.pyc").absolute() not in project.all_files() +@posix +def test_all_files_git_ignore_symlinks_to_covered_files(git_repository): + """All symlinks to covered files must be ignored.""" + (git_repository / "symlink0").symlink_to("doc/index.rst") + for i in range(5): + (git_repository / f"symlink{i + 1}").symlink_to(f"symlink{i}") + project = Project(git_repository) + for i in range(6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + +git_params = list( + filter( + lambda x: not (x[0] == "non_existent_file" and x[1] is True), + itertools.product( + [ + ".git/file_in_dotgit", + ".git/annex/objects/file_in_annex", + "../outside_file", + "build/somefile.py", + "non_existent_file", + ], + [False, True], + ), + ) +) + + +@posix +@pytest.mark.parametrize( + "target,create_target", + git_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", git_params), +) +def test_all_files_git_cover_symlinks_to_uncovered_files( + empty_directory, git_repository, target, create_target +): + """All symlinks to files not covered must be included.""" + git_repository_target_path = empty_directory / "repository" + shutil.move(git_repository, git_repository_target_path) + git_repository = git_repository_target_path + if create_target: + (git_repository / target).parent.mkdir(parents=True, exist_ok=True) + (git_repository / target).write_text("some content") + (git_repository / "symlink").symlink_to(target) + project = Project(git_repository) + assert Path("symlink").absolute() in project.all_files() + + +@posix +@pytest.mark.parametrize( + "target,create_target", + git_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", git_params), +) +def test_all_files_git_ignore_symlinks_to_covered_symlinks( + empty_directory, git_repository, target, create_target +): + """All symlinks to symlinks that are considered to be covered files must be + ignored. + """ + git_repository_target_path = empty_directory / "repository" + shutil.move(git_repository, git_repository_target_path) + git_repository = git_repository_target_path + if create_target: + (git_repository / target).parent.mkdir(parents=True, exist_ok=True) + (git_repository / target).write_text("some content") + (git_repository / "symlink0").symlink_to(target) + for i in range(5): + (git_repository / f"symlink{i + 1}").symlink_to(f"symlink{i}") + project = Project(git_repository) + for i in range(1, 6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + def test_all_files_submodule_is_ignored(submodule_repository): """If a submodule is ignored, all_files should not raise an Exception.""" (submodule_repository / "submodule/foo.py").write_text("foo") @@ -203,6 +346,80 @@ def test_all_files_hg_ignored_contains_newline(hg_repository): assert Path("hello\nworld.pyc").absolute() not in project.all_files() +@posix +def test_all_files_hg_ignore_symlinks_to_covered_files(hg_repository): + """All symlinks to covered files must be ignored.""" + (hg_repository / "symlink0").symlink_to("doc/index.rst") + for i in range(5): + (hg_repository / f"symlink{i + 1}").symlink_to(f"symlink{i}") + project = Project(hg_repository) + for i in range(6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + +hg_params = list( + filter( + lambda x: not (x[0] == "non_existent_file" and x[1] is True), + itertools.product( + [ + ".hg/file_in_dothg", + "../outside_file", + "build/somefile.py", + "non_existent_file", + ], + [False, True], + ), + ) +) + + +@posix +@pytest.mark.parametrize( + "target,create_target", + hg_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", hg_params), +) +def test_all_files_hg_cover_symlinks_to_uncovered_files( + empty_directory, hg_repository, target, create_target +): + """All symlinks to files not covered must be included.""" + hg_repository_target_path = empty_directory / "repository" + shutil.move(hg_repository, hg_repository_target_path) + hg_repository = hg_repository_target_path + if create_target: + (hg_repository / target).parent.mkdir(parents=True, exist_ok=True) + (hg_repository / target).write_text("some content") + (hg_repository / "symlink").symlink_to(target) + project = Project(hg_repository) + assert Path("symlink").absolute() in project.all_files() + + +@posix +@pytest.mark.parametrize( + "target,create_target", + hg_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", hg_params), +) +def test_all_files_hg_ignore_symlinks_to_covered_symlinks( + empty_directory, hg_repository, target, create_target +): + """All symlinks to symlinks that are considered to be covered files must be + ignored. + """ + hg_repository_target_path = empty_directory / "repository" + shutil.move(hg_repository, hg_repository_target_path) + hg_repository = hg_repository_target_path + if create_target: + (hg_repository / target).parent.mkdir(parents=True, exist_ok=True) + (hg_repository / target).write_text("some content") + (hg_repository / "symlink0").symlink_to(target) + for i in range(5): + (hg_repository / f"symlink{i + 1}").symlink_to(f"symlink{i}") + project = Project(hg_repository) + for i in range(1, 6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + def test_reuse_info_of_file_does_not_exist(fake_repository): """Raise FileNotFoundError when asking for the REUSE info of a file that does not exist.