Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix symlink handling #764

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,18 @@ passed as optional argument.
Meson subprojects are automatically ignored if ``meson.build`` exists in the
project root. ``--include-meson-subprojects`` overrides this behaviour.

Symbolic links and files that are zero-sized are automatically ignored.
Files that are zero-sized are automatically ignored.

Symbolic links are handled differently depending on the target of the link:

#. a symlink pointing to a covered file is considered to be the same file as
the covered file and is therefore ignored.
#. a symlink pointing to a file that is not a covered file is itself considered
to be a covered file and is not skipped, unless the symlink is ignored by
other means.

A "covered file" is the term used in the REUSE Specification to name a file
that needs copyright and licensing information.

annotate
========
Expand Down
3 changes: 3 additions & 0 deletions src/reuse/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2021 Alliander N.V.
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand All @@ -26,6 +27,8 @@

from boolean.boolean import Expression

import reuse.compat

try:
__version__ = version("reuse")
except PackageNotFoundError:
Expand Down
5 changes: 5 additions & 0 deletions src/reuse/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# SPDX-FileCopyrightText: 2022 Pietro Albini <[email protected]>
# SPDX-FileCopyrightText: 2023 DB Systel GmbH
# SPDX-FileCopyrightText: 2023 Johannes Zarl-Zierl <[email protected]>
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -476,6 +477,10 @@ def _check_read(self, path: Path) -> None:
_("'{}' is not a directory").format(path)
)
return
if not path.exists() and path.is_symlink():
# If the path is a broken symlink we can continue, allowing usage of
# --force-dot-license even if the link target is not readable.
return
raise ArgumentTypeError(_("can't open '{}'").format(path))

def _check_write(self, path: Path) -> None:
Expand Down
30 changes: 30 additions & 0 deletions src/reuse/compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

"""This module adds compatibility code like backports."""
import os
import sys
from pathlib import Path

# Introduce an implementation of pathlib.Path's is_relative_to in python
# versions before 3.9
if sys.version_info < (3, 9):

def _is_relative_to(self: Path, path: Path) -> bool:
try:
self.relative_to(path)
return True
except ValueError:
return False

setattr(Path, "is_relative_to", _is_relative_to)

# Introduce an implementation of pathlib.Path's readlink in python versions
# before 3.9
if sys.version_info < (3, 9):

def _readlink(self: Path) -> Path:
return Path(os.readlink(self))

setattr(Path, "readlink", _readlink)
3 changes: 2 additions & 1 deletion src/reuse/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2022 Yaman Qalieh
# SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <[email protected]>
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -392,7 +393,7 @@ def _is_uncommentable(path: Path) -> bool:
registered as an UncommentableCommentStyle.
"""
is_uncommentable = _get_comment_style(path) == UncommentableCommentStyle
return is_uncommentable or is_binary(str(path))
return is_uncommentable or path.is_symlink() or is_binary(str(path))


def _verify_paths_line_handling(
Expand Down
84 changes: 54 additions & 30 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2023 DB Systel GmbH
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -136,8 +137,27 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
_LOGGER.debug("ignoring '%s'", the_file)
continue
if the_file.is_symlink():
_LOGGER.debug("skipping symlink '%s'", the_file)
continue
# Needs to use os.path.absolute instead of Path.absolute
# since the former normalizes the path, i.e. resolves "..".
# There is no method in pathlib for this which doesn't also
# resolve symlinks recursively, like Path.resolve.
target_file = Path(
os.path.abspath(the_file.readlink()) # type: ignore
)
_LOGGER.debug(
"'%s' is a symlink pointing to '%s'",
the_file,
target_file,
)
if (
target_file.is_relative_to( # type: ignore # pylint: disable=E1101
self.root.resolve()
)
and (target_file.exists() or target_file.is_symlink())
and not self._is_path_ignored(target_file)
):
_LOGGER.debug("skipping symlink '%s'", the_file)
continue
# Suppressing this error because I simply don't want to deal
# with that here.
with contextlib.suppress(OSError):
Expand Down Expand Up @@ -184,35 +204,39 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo:
dep5_path = source_path

# Search the file for REUSE information.
with path.open("rb") as fp:
try:
# Completely read the file once to search for possible snippets
if _contains_snippet(fp):
_LOGGER.debug(f"'{path}' seems to contain a SPDX Snippet")
read_limit = None
else:
read_limit = _HEADER_BYTES
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possible limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result:
source_path = str(path)
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE_FILE
if not path.is_symlink():
with path.open("rb") as fp:
try:
# Completely read the file once to search for possible
# snippets
if _contains_snippet(fp):
_LOGGER.debug(
f"'{path}' seems to contain a SPDX Snippet"
)
read_limit = None
else:
source_type = SourceType.FILE_HEADER

except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)
read_limit = _HEADER_BYTES
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possible limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result:
source_path = str(path)
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE_FILE
else:
source_type = SourceType.FILE_HEADER

except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)

# There is both information in a .dep5 file and in the file header
if (
Expand Down
7 changes: 4 additions & 3 deletions src/reuse/report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2022 Pietro Albini <[email protected]>
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -445,14 +446,14 @@ def generate(
) -> "FileReport":
"""Generate a FileReport from a path in a Project."""
path = Path(path)
if not path.is_file():
raise OSError(f"{path} is not a file")
if not path.is_file() and not path.is_symlink():
raise OSError(f"{path} is not supported")

relative = project.relative_from_root(path)
report = cls("./" + str(relative), path, do_checksum=do_checksum)

# Checksum and ID
if report.do_checksum:
if report.do_checksum and not path.is_symlink():
report.spdxfile.chk_sum = _checksum(path)
else:
# This path avoids a lot of heavy computation, which is handy for
Expand Down
17 changes: 13 additions & 4 deletions src/reuse/vcs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
# SPDX-FileCopyrightText: 2020 John Mulligan <[email protected]>
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -99,11 +100,15 @@ def _find_all_ignored_files(self) -> Set[Path]:
]
result = execute_command(command, _LOGGER, cwd=self.project.root)
all_files = result.stdout.decode("utf-8").split("\0")
return {Path(file_) for file_ in all_files}
return {Path(file_) for file_ in all_files[:-1]}.union({Path(".git")})

def is_ignored(self, path: StrPath) -> bool:
path = self.project.relative_from_root(path)
return path in self._all_ignored_files
return path in self._all_ignored_files or any(
path.is_relative_to(ignored_dir) # type: ignore
for ignored_dir in self._all_ignored_files
if ignored_dir.is_dir()
)

@classmethod
def in_repo(cls, directory: StrPath) -> bool:
Expand Down Expand Up @@ -163,11 +168,15 @@ def _find_all_ignored_files(self) -> Set[Path]:
]
result = execute_command(command, _LOGGER, cwd=self.project.root)
all_files = result.stdout.decode("utf-8").split("\0")
return {Path(file_) for file_ in all_files}
return {Path(file_) for file_ in all_files[:-1]}.union({Path(".hg")})

def is_ignored(self, path: StrPath) -> bool:
path = self.project.relative_from_root(path)
return path in self._all_ignored_files
return path in self._all_ignored_files or any(
path.is_relative_to(ignored_dir) # type: ignore
for ignored_dir in self._all_ignored_files
if ignored_dir.is_dir()
)

@classmethod
def in_repo(cls, directory: StrPath) -> bool:
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <[email protected]>
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -137,6 +138,15 @@ def fake_repository(tmpdir_factory) -> Path:
encoding="utf-8",
)

(directory / "symlink-to-covered").symlink_to(directory / "doc/index.rst")
(directory / "symlink-to-not-covered").symlink_to(directory)
(directory / "symlink-to-not-covered.license").write_text(
"# SPDX-FileCopyrightText: 2017 Jane Doe\n"
"#\n"
"# SPDX-License-Identifier: GPL-3.0-or-later",
encoding="utf-8",
)

os.chdir(directory)
return directory

Expand Down
58 changes: 58 additions & 0 deletions tests/test_main_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,27 @@
# SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <[email protected]>
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

"""Tests for reuse._main: annotate"""
import logging
import stat
from importlib import import_module
from inspect import cleandoc

import pytest

from reuse._main import main

try:
IS_POSIX = bool(import_module("posix"))
except ImportError:
IS_POSIX = False

posix = pytest.mark.skipif(not IS_POSIX, reason="Windows not supported")

# pylint: disable=too-many-lines,unused-argument


Expand Down Expand Up @@ -971,6 +980,55 @@ def test_annotate_force_dot_license_doesnt_write_to_file(
assert simple_file.read_text() == "Preserve this"


@posix
@pytest.mark.parametrize(
"create_target",
[True, False],
ids=map(lambda x: f"create_target={x}", [True, False]),
)
def test_annotate_force_dot_license_for_symlinks(
fake_repository, stringio, mock_date_today, create_target
):
"""Annotating a symlink, broken or not, with --force-dot-license should
result in a .license file next to the symlink.
"""
target_file = fake_repository / "target-file"
if create_target:
target_file.write_text("Preserve this")
symlink = fake_repository / "symlink"
symlink.symlink_to(target_file.relative_to(fake_repository))
expected = cleandoc(
"""
SPDX-FileCopyrightText: 2018 Jane Doe

SPDX-License-Identifier: GPL-3.0-or-later
"""
)

result = main(
[
"annotate",
"--license",
"GPL-3.0-or-later",
"--copyright",
"Jane Doe",
"--force-dot-license",
"symlink",
],
out=stringio,
)

assert result == 0
assert (
symlink.with_name(f"{symlink.name}.license").read_text().strip()
== expected
)
if create_target:
assert target_file.read_text() == "Preserve this"
else:
assert not symlink.exists()


def test_annotate_to_read_only_file_does_not_traceback(
fake_repository, stringio, mock_date_today
):
Expand Down
Loading