From b7f8e8b933c66a2c615e5fab835c22fd06edeefe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Thu, 23 Feb 2023 14:02:30 +0100 Subject: [PATCH 1/8] Backport Path.is_relative_to to python3.8 and before --- src/reuse/__init__.py | 3 +++ src/reuse/compat.py | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 src/reuse/compat.py diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py index d31b59dcb..b6a595c03 100644 --- a/src/reuse/__init__.py +++ b/src/reuse/__init__.py @@ -1,5 +1,6 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2021 Alliander N.V. +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -26,6 +27,8 @@ from boolean.boolean import Expression +import reuse.compat + try: __version__ = version("reuse") except PackageNotFoundError: diff --git a/src/reuse/compat.py b/src/reuse/compat.py new file mode 100644 index 000000000..9e034a0fe --- /dev/null +++ b/src/reuse/compat.py @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: 2023 Matthias Riße +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""This module adds compatibility code like backports.""" +import sys + +# Introduce an implementation of pathlib.Path's is_relative_to in python +# versions before 3.9 +if sys.version_info < (3, 9): + from pathlib import Path + + def _is_relative_to(self: Path, path: Path) -> bool: + try: + self.relative_to(path) + return True + except ValueError: + return False + + setattr(Path, "is_relative_to", _is_relative_to) From 70638eddb356186e7e43d2420bcf47708063064b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Tue, 20 Jun 2023 17:25:12 +0200 Subject: [PATCH 2/8] Backport Path.readlink to python3.8 and before --- src/reuse/compat.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/reuse/compat.py b/src/reuse/compat.py index 9e034a0fe..45b4294bc 100644 --- a/src/reuse/compat.py +++ b/src/reuse/compat.py @@ -3,12 +3,13 @@ # SPDX-License-Identifier: GPL-3.0-or-later """This module adds compatibility code like backports.""" +import os import sys +from pathlib import Path # Introduce an implementation of pathlib.Path's is_relative_to in python # versions before 3.9 if sys.version_info < (3, 9): - from pathlib import Path def _is_relative_to(self: Path, path: Path) -> bool: try: @@ -18,3 +19,12 @@ def _is_relative_to(self: Path, path: Path) -> bool: return False setattr(Path, "is_relative_to", _is_relative_to) + +# Introduce an implementation of pathlib.Path's readlink in python versions +# before 3.9 +if sys.version_info < (3, 9): + + def _readlink(self: Path) -> Path: + return Path(os.readlink(self)) + + setattr(Path, "readlink", _readlink) From 6ed46dbead23ed708a6f2abde40b1ffe31b1f837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Fri, 16 Jun 2023 17:15:03 +0200 Subject: [PATCH 3/8] Fix ignored path detection in ignored directories Previously a sub-path of an ignored directory was not considered to be ignored, e.g. is_ignored returned False for a file build/hello.py even if _all_ignored_files contained Path("build"). Instead of just checking if the path is in the ignored files we also have to check if it is inside of an ignored directory. Additionally this commit fixes a bug that surfaced: when building the _all_ignored_files set all_files contains an empty string as its last element. This leads to Path(".") being an element in _all_ignored_files, i.e. ignore everything. Using all but the last element from all_files fixes that. --- src/reuse/vcs.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/reuse/vcs.py b/src/reuse/vcs.py index 3ae70b117..ec760abea 100644 --- a/src/reuse/vcs.py +++ b/src/reuse/vcs.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: © 2020 Liferay, Inc. # SPDX-FileCopyrightText: 2020 John Mulligan +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -99,11 +100,15 @@ def _find_all_ignored_files(self) -> Set[Path]: ] result = execute_command(command, _LOGGER, cwd=self.project.root) all_files = result.stdout.decode("utf-8").split("\0") - return {Path(file_) for file_ in all_files} + return {Path(file_) for file_ in all_files[:-1]} def is_ignored(self, path: StrPath) -> bool: path = self.project.relative_from_root(path) - return path in self._all_ignored_files + return path in self._all_ignored_files or any( + path.is_relative_to(ignored_dir) # type: ignore + for ignored_dir in self._all_ignored_files + if ignored_dir.is_dir() + ) @classmethod def in_repo(cls, directory: StrPath) -> bool: @@ -163,11 +168,15 @@ def _find_all_ignored_files(self) -> Set[Path]: ] result = execute_command(command, _LOGGER, cwd=self.project.root) all_files = result.stdout.decode("utf-8").split("\0") - return {Path(file_) for file_ in all_files} + return {Path(file_) for file_ in all_files[:-1]} def is_ignored(self, path: StrPath) -> bool: path = self.project.relative_from_root(path) - return path in self._all_ignored_files + return path in self._all_ignored_files or any( + path.is_relative_to(ignored_dir) # type: ignore + for ignored_dir in self._all_ignored_files + if ignored_dir.is_dir() + ) @classmethod def in_repo(cls, directory: StrPath) -> bool: From 2bb117c0c1d581000e69cc182cdd270b70af85e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Thu, 23 Feb 2023 11:47:08 +0100 Subject: [PATCH 4/8] Add tests for better handling of symlinks These tests assume the interpretation of the REUSE specification that: 1. a symlink pointing to a covered file is considered to be the same file as the covered file and can therefore be ignored. 2. a symlink pointing to a file that is not a covered file is itself considered to be a covered file and should not be ignored, unless the symlink itself is ignored by other means. --- tests/conftest.py | 10 ++ tests/test_project.py | 225 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 231 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 601f5ecc4..25260db57 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -137,6 +138,15 @@ def fake_repository(tmpdir_factory) -> Path: encoding="utf-8", ) + (directory / "symlink-to-covered").symlink_to(directory / "doc/index.rst") + (directory / "symlink-to-not-covered").symlink_to(directory) + (directory / "symlink-to-not-covered.license").write_text( + "# SPDX-FileCopyrightText: 2017 Jane Doe\n" + "#\n" + "# SPDX-License-Identifier: GPL-3.0-or-later", + encoding="utf-8", + ) + os.chdir(directory) return directory diff --git a/tests/test_project.py b/tests/test_project.py index f0be53459..a719d0d46 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -1,11 +1,13 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: © 2020 Liferay, Inc. # SPDX-FileCopyrightText: 2022 Florian Snow +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later """Tests for reuse.project.""" +import itertools import os import shutil import warnings @@ -99,8 +101,8 @@ def test_all_files_ignore_hg(empty_directory): @posix -def test_all_files_symlinks(empty_directory): - """All symlinks must be ignored.""" +def test_all_files_ignore_symlinks_to_covered_files(empty_directory): + """All symlinks to covered files must be ignored.""" (empty_directory / "blob").write_text("foo") (empty_directory / "blob.license").write_text( cleandoc( @@ -111,9 +113,75 @@ def test_all_files_symlinks(empty_directory): """ ) ) - (empty_directory / "symlink").symlink_to("blob") + (empty_directory / "symlink0").symlink_to("blob") + for i in range(5): + (empty_directory / f"symlink{i + 1}").symlink_to(f"symlink{i}") project = Project(empty_directory) - assert Path("symlink").absolute() not in project.all_files() + for i in range(6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + +no_vcs_params = list( + filter( + lambda x: not (x[0] == "non_existent_file" and x[1] is True), + itertools.product( + [ + "../outside_file", + "non_existent_file", + ], + [False, True], + ), + ) +) + + +@posix +@pytest.mark.parametrize( + "target,create_target", + no_vcs_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", no_vcs_params), +) +def test_all_files_cover_symlinks_to_uncovered_files( + empty_directory, target, create_target +): + """All symlinks to files not covered must be included.""" + project_dir = empty_directory / "project_dir" + project_dir.mkdir() + (project_dir / "symlink").symlink_to(target) + if create_target: + (project_dir / target).parent.mkdir(parents=True, exist_ok=True) + (project_dir / target).write_text("some content") + project = Project(project_dir) + assert (project_dir / "symlink").absolute() in project.all_files() + + +@posix +@pytest.mark.parametrize( + "target,create_target", + no_vcs_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", no_vcs_params), +) +def test_all_files_ignore_symlinks_to_covered_symlinks( + empty_directory, target, create_target +): + """All symlinks to symlinks that are considered to be covered files must be + ignored. + """ + project_dir = empty_directory / "project_dir" + project_dir.mkdir() + (project_dir / "symlink0").symlink_to(target) + for i in range(5): + (project_dir / f"symlink{i + 1}").symlink_to( + project_dir / f"symlink{i}" + ) + if create_target: + (project_dir / target).parent.mkdir(parents=True, exist_ok=True) + (project_dir / target).write_text("some content") + project = Project(project_dir) + for i in range(1, 6): + assert ( + project_dir / f"symlink{i}" + ).absolute() not in project.all_files() def test_all_files_ignore_zero_sized(empty_directory): @@ -158,6 +226,81 @@ def test_all_files_git_ignored_contains_newline(git_repository): assert Path("hello\nworld.pyc").absolute() not in project.all_files() +@posix +def test_all_files_git_ignore_symlinks_to_covered_files(git_repository): + """All symlinks to covered files must be ignored.""" + (git_repository / "symlink0").symlink_to("doc/index.rst") + for i in range(5): + (git_repository / f"symlink{i + 1}").symlink_to(f"symlink{i}") + project = Project(git_repository) + for i in range(6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + +git_params = list( + filter( + lambda x: not (x[0] == "non_existent_file" and x[1] is True), + itertools.product( + [ + ".git/file_in_dotgit", + ".git/annex/objects/file_in_annex", + "../outside_file", + "build/somefile.py", + "non_existent_file", + ], + [False, True], + ), + ) +) + + +@posix +@pytest.mark.parametrize( + "target,create_target", + git_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", git_params), +) +def test_all_files_git_cover_symlinks_to_uncovered_files( + empty_directory, git_repository, target, create_target +): + """All symlinks to files not covered must be included.""" + git_repository_target_path = empty_directory / "repository" + shutil.move(git_repository, git_repository_target_path) + git_repository = git_repository_target_path + if create_target: + (git_repository / target).parent.mkdir(parents=True, exist_ok=True) + (git_repository / target).write_text("some content") + (git_repository / "symlink").symlink_to(target) + project = Project(git_repository) + assert Path("symlink").absolute() in project.all_files() + + +@posix +@pytest.mark.parametrize( + "target,create_target", + git_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", git_params), +) +def test_all_files_git_ignore_symlinks_to_covered_symlinks( + empty_directory, git_repository, target, create_target +): + """All symlinks to symlinks that are considered to be covered files must be + ignored. + """ + git_repository_target_path = empty_directory / "repository" + shutil.move(git_repository, git_repository_target_path) + git_repository = git_repository_target_path + if create_target: + (git_repository / target).parent.mkdir(parents=True, exist_ok=True) + (git_repository / target).write_text("some content") + (git_repository / "symlink0").symlink_to(target) + for i in range(5): + (git_repository / f"symlink{i + 1}").symlink_to(f"symlink{i}") + project = Project(git_repository) + for i in range(1, 6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + def test_all_files_submodule_is_ignored(submodule_repository): """If a submodule is ignored, all_files should not raise an Exception.""" (submodule_repository / "submodule/foo.py").write_text("foo") @@ -203,6 +346,80 @@ def test_all_files_hg_ignored_contains_newline(hg_repository): assert Path("hello\nworld.pyc").absolute() not in project.all_files() +@posix +def test_all_files_hg_ignore_symlinks_to_covered_files(hg_repository): + """All symlinks to covered files must be ignored.""" + (hg_repository / "symlink0").symlink_to("doc/index.rst") + for i in range(5): + (hg_repository / f"symlink{i + 1}").symlink_to(f"symlink{i}") + project = Project(hg_repository) + for i in range(6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + +hg_params = list( + filter( + lambda x: not (x[0] == "non_existent_file" and x[1] is True), + itertools.product( + [ + ".hg/file_in_dothg", + "../outside_file", + "build/somefile.py", + "non_existent_file", + ], + [False, True], + ), + ) +) + + +@posix +@pytest.mark.parametrize( + "target,create_target", + hg_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", hg_params), +) +def test_all_files_hg_cover_symlinks_to_uncovered_files( + empty_directory, hg_repository, target, create_target +): + """All symlinks to files not covered must be included.""" + hg_repository_target_path = empty_directory / "repository" + shutil.move(hg_repository, hg_repository_target_path) + hg_repository = hg_repository_target_path + if create_target: + (hg_repository / target).parent.mkdir(parents=True, exist_ok=True) + (hg_repository / target).write_text("some content") + (hg_repository / "symlink").symlink_to(target) + project = Project(hg_repository) + assert Path("symlink").absolute() in project.all_files() + + +@posix +@pytest.mark.parametrize( + "target,create_target", + hg_params, + ids=map(lambda x: f"target={x[0]},create_target={x[1]}", hg_params), +) +def test_all_files_hg_ignore_symlinks_to_covered_symlinks( + empty_directory, hg_repository, target, create_target +): + """All symlinks to symlinks that are considered to be covered files must be + ignored. + """ + hg_repository_target_path = empty_directory / "repository" + shutil.move(hg_repository, hg_repository_target_path) + hg_repository = hg_repository_target_path + if create_target: + (hg_repository / target).parent.mkdir(parents=True, exist_ok=True) + (hg_repository / target).write_text("some content") + (hg_repository / "symlink0").symlink_to(target) + for i in range(5): + (hg_repository / f"symlink{i + 1}").symlink_to(f"symlink{i}") + project = Project(hg_repository) + for i in range(1, 6): + assert Path(f"symlink{i}").absolute() not in project.all_files() + + def test_reuse_info_of_file_does_not_exist(fake_repository): """Raise FileNotFoundError when asking for the REUSE info of a file that does not exist. From fcee822a1454c27b1d1c45f117a14bff4f5df5e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Thu, 16 Feb 2023 14:27:55 +0100 Subject: [PATCH 5/8] Add better handling of symlinks --- src/reuse/project.py | 84 ++++++++++++++++++++++++++++---------------- src/reuse/report.py | 7 ++-- src/reuse/vcs.py | 4 +-- 3 files changed, 60 insertions(+), 35 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index 9f192f9e7..d0089639f 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2023 DB Systel GmbH +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -136,8 +137,27 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: _LOGGER.debug("ignoring '%s'", the_file) continue if the_file.is_symlink(): - _LOGGER.debug("skipping symlink '%s'", the_file) - continue + # Needs to use os.path.absolute instead of Path.absolute + # since the former normalizes the path, i.e. resolves "..". + # There is no method in pathlib for this which doesn't also + # resolve symlinks recursively, like Path.resolve. + target_file = Path( + os.path.abspath(the_file.readlink()) # type: ignore + ) + _LOGGER.debug( + "'%s' is a symlink pointing to '%s'", + the_file, + target_file, + ) + if ( + target_file.is_relative_to( # type: ignore # pylint: disable=E1101 + self.root.resolve() + ) + and (target_file.exists() or target_file.is_symlink()) + and not self._is_path_ignored(target_file) + ): + _LOGGER.debug("skipping symlink '%s'", the_file) + continue # Suppressing this error because I simply don't want to deal # with that here. with contextlib.suppress(OSError): @@ -184,35 +204,39 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo: dep5_path = source_path # Search the file for REUSE information. - with path.open("rb") as fp: - try: - # Completely read the file once to search for possible snippets - if _contains_snippet(fp): - _LOGGER.debug(f"'{path}' seems to contain a SPDX Snippet") - read_limit = None - else: - read_limit = _HEADER_BYTES - # Reset read position - fp.seek(0) - # Scan the file for REUSE info, possible limiting the read - # length - file_result = extract_reuse_info( - decoded_text_from_binary(fp, size=read_limit) - ) - if file_result: - source_path = str(path) - if path.suffix == ".license": - source_type = SourceType.DOT_LICENSE_FILE + if not path.is_symlink(): + with path.open("rb") as fp: + try: + # Completely read the file once to search for possible + # snippets + if _contains_snippet(fp): + _LOGGER.debug( + f"'{path}' seems to contain a SPDX Snippet" + ) + read_limit = None else: - source_type = SourceType.FILE_HEADER - - except (ExpressionError, ParseError): - _LOGGER.error( - _( - "'{path}' holds an SPDX expression that cannot be" - " parsed, skipping the file" - ).format(path=path) - ) + read_limit = _HEADER_BYTES + # Reset read position + fp.seek(0) + # Scan the file for REUSE info, possible limiting the read + # length + file_result = extract_reuse_info( + decoded_text_from_binary(fp, size=read_limit) + ) + if file_result: + source_path = str(path) + if path.suffix == ".license": + source_type = SourceType.DOT_LICENSE_FILE + else: + source_type = SourceType.FILE_HEADER + + except (ExpressionError, ParseError): + _LOGGER.error( + _( + "'{path}' holds an SPDX expression that cannot be" + " parsed, skipping the file" + ).format(path=path) + ) # There is both information in a .dep5 file and in the file header if ( diff --git a/src/reuse/report.py b/src/reuse/report.py index 268e48430..297a8e0a8 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Pietro Albini +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -445,14 +446,14 @@ def generate( ) -> "FileReport": """Generate a FileReport from a path in a Project.""" path = Path(path) - if not path.is_file(): - raise OSError(f"{path} is not a file") + if not path.is_file() and not path.is_symlink(): + raise OSError(f"{path} is not supported") relative = project.relative_from_root(path) report = cls("./" + str(relative), path, do_checksum=do_checksum) # Checksum and ID - if report.do_checksum: + if report.do_checksum and not path.is_symlink(): report.spdxfile.chk_sum = _checksum(path) else: # This path avoids a lot of heavy computation, which is handy for diff --git a/src/reuse/vcs.py b/src/reuse/vcs.py index ec760abea..a22e9de6d 100644 --- a/src/reuse/vcs.py +++ b/src/reuse/vcs.py @@ -100,7 +100,7 @@ def _find_all_ignored_files(self) -> Set[Path]: ] result = execute_command(command, _LOGGER, cwd=self.project.root) all_files = result.stdout.decode("utf-8").split("\0") - return {Path(file_) for file_ in all_files[:-1]} + return {Path(file_) for file_ in all_files[:-1]}.union({Path(".git")}) def is_ignored(self, path: StrPath) -> bool: path = self.project.relative_from_root(path) @@ -168,7 +168,7 @@ def _find_all_ignored_files(self) -> Set[Path]: ] result = execute_command(command, _LOGGER, cwd=self.project.root) all_files = result.stdout.decode("utf-8").split("\0") - return {Path(file_) for file_ in all_files[:-1]} + return {Path(file_) for file_ in all_files[:-1]}.union({Path(".hg")}) def is_ignored(self, path: StrPath) -> bool: path = self.project.relative_from_root(path) From 0552ef1c1521f5987c01db10f9ab100f627e8ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Wed, 21 Jun 2023 14:33:14 +0200 Subject: [PATCH 6/8] Add tests for annotating symlinks --- tests/test_main_annotate.py | 58 +++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/test_main_annotate.py b/tests/test_main_annotate.py index d054fa086..511a821f5 100644 --- a/tests/test_main_annotate.py +++ b/tests/test_main_annotate.py @@ -3,18 +3,27 @@ # SPDX-FileCopyrightText: © 2020 Liferay, Inc. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later """Tests for reuse._main: annotate""" import logging import stat +from importlib import import_module from inspect import cleandoc import pytest from reuse._main import main +try: + IS_POSIX = bool(import_module("posix")) +except ImportError: + IS_POSIX = False + +posix = pytest.mark.skipif(not IS_POSIX, reason="Windows not supported") + # pylint: disable=too-many-lines,unused-argument @@ -971,6 +980,55 @@ def test_annotate_force_dot_license_doesnt_write_to_file( assert simple_file.read_text() == "Preserve this" +@posix +@pytest.mark.parametrize( + "create_target", + [True, False], + ids=map(lambda x: f"create_target={x}", [True, False]), +) +def test_annotate_force_dot_license_for_symlinks( + fake_repository, stringio, mock_date_today, create_target +): + """Annotating a symlink, broken or not, with --force-dot-license should + result in a .license file next to the symlink. + """ + target_file = fake_repository / "target-file" + if create_target: + target_file.write_text("Preserve this") + symlink = fake_repository / "symlink" + symlink.symlink_to(target_file.relative_to(fake_repository)) + expected = cleandoc( + """ + SPDX-FileCopyrightText: 2018 Jane Doe + + SPDX-License-Identifier: GPL-3.0-or-later + """ + ) + + result = main( + [ + "annotate", + "--license", + "GPL-3.0-or-later", + "--copyright", + "Jane Doe", + "--force-dot-license", + "symlink", + ], + out=stringio, + ) + + assert result == 0 + assert ( + symlink.with_name(f"{symlink.name}.license").read_text().strip() + == expected + ) + if create_target: + assert target_file.read_text() == "Preserve this" + else: + assert not symlink.exists() + + def test_annotate_to_read_only_file_does_not_traceback( fake_repository, stringio, mock_date_today ): From 03f5b437cf5b39d33851f8674b46368b14ffbf77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Wed, 21 Jun 2023 14:54:27 +0200 Subject: [PATCH 7/8] Fix annotating symlinks --- src/reuse/_util.py | 5 +++++ src/reuse/header.py | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/reuse/_util.py b/src/reuse/_util.py index 4b29ff0ae..3d4d83b16 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -7,6 +7,7 @@ # SPDX-FileCopyrightText: 2022 Pietro Albini # SPDX-FileCopyrightText: 2023 DB Systel GmbH # SPDX-FileCopyrightText: 2023 Johannes Zarl-Zierl +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -476,6 +477,10 @@ def _check_read(self, path: Path) -> None: _("'{}' is not a directory").format(path) ) return + if not path.exists() and path.is_symlink(): + # If the path is a broken symlink we can continue, allowing usage of + # --force-dot-license even if the link target is not readable. + return raise ArgumentTypeError(_("can't open '{}'").format(path)) def _check_write(self, path: Path) -> None: diff --git a/src/reuse/header.py b/src/reuse/header.py index 8725c94f6..07382c802 100644 --- a/src/reuse/header.py +++ b/src/reuse/header.py @@ -9,6 +9,7 @@ # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Yaman Qalieh # SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -392,7 +393,7 @@ def _is_uncommentable(path: Path) -> bool: registered as an UncommentableCommentStyle. """ is_uncommentable = _get_comment_style(path) == UncommentableCommentStyle - return is_uncommentable or is_binary(str(path)) + return is_uncommentable or path.is_symlink() or is_binary(str(path)) def _verify_paths_line_handling( From 13e4fa746b1a7fbb8adae6d6bcd5bb52cb1588de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Thu, 22 Jun 2023 10:21:31 +0200 Subject: [PATCH 8/8] Document changes to the handling of symlinks --- docs/usage.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/usage.rst b/docs/usage.rst index e9c505822..31778cc4b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -39,7 +39,18 @@ passed as optional argument. Meson subprojects are automatically ignored if ``meson.build`` exists in the project root. ``--include-meson-subprojects`` overrides this behaviour. -Symbolic links and files that are zero-sized are automatically ignored. +Files that are zero-sized are automatically ignored. + +Symbolic links are handled differently depending on the target of the link: + +#. a symlink pointing to a covered file is considered to be the same file as + the covered file and is therefore ignored. +#. a symlink pointing to a file that is not a covered file is itself considered + to be a covered file and is not skipped, unless the symlink is ignored by + other means. + +A "covered file" is the term used in the REUSE Specification to name a file +that needs copyright and licensing information. annotate ========