From 73f1e35f72eda4960f258c6a96aef65aaa97fa1d Mon Sep 17 00:00:00 2001 From: ralbertazzi Date: Sun, 28 May 2023 18:48:54 +0200 Subject: [PATCH] feat: perform a single hash validation of files --- src/poetry/installation/wheel_installer.py | 99 +++++++++++++++++++--- 1 file changed, 88 insertions(+), 11 deletions(-) diff --git a/src/poetry/installation/wheel_installer.py b/src/poetry/installation/wheel_installer.py index 18e42e9cbd9..0afc3c263cb 100644 --- a/src/poetry/installation/wheel_installer.py +++ b/src/poetry/installation/wheel_installer.py @@ -5,9 +5,13 @@ from pathlib import Path from typing import TYPE_CHECKING +from typing import Collection +from typing import Iterable from installer import install from installer.destinations import SchemeDictionaryDestination +from installer.records import RecordEntry +from installer.records import parse_record_file from installer.sources import WheelFile from installer.sources import _WheelFileValidationError @@ -18,7 +22,6 @@ if TYPE_CHECKING: from typing import BinaryIO - from installer.records import RecordEntry from installer.scripts import LauncherKind from installer.utils import Scheme @@ -26,7 +29,26 @@ class WheelDestination(SchemeDictionaryDestination): - """ """ + def __init__( + self, + source: WheelFile, + scheme_dict: dict[str, str], + interpreter: str, + script_kind: LauncherKind, + hash_algorithm: str = "sha256", + bytecode_optimization_levels: Collection[int] = (), + destdir: str | None = None, + ) -> None: + super().__init__( + scheme_dict=scheme_dict, + interpreter=interpreter, + script_kind=script_kind, + hash_algorithm=hash_algorithm, + bytecode_optimization_levels=bytecode_optimization_levels, + destdir=destdir, + ) + self._source = source + self.issues: list[str] = [] def write_to_fs( self, @@ -36,7 +58,6 @@ def write_to_fs( is_executable: bool, ) -> RecordEntry: from installer.records import Hash - from installer.records import RecordEntry from installer.utils import copyfileobj_with_hashing from installer.utils import make_file_executable @@ -66,12 +87,52 @@ def for_source(self, source: WheelFile) -> WheelDestination: scheme_dict["headers"] = str(Path(scheme_dict["headers"]) / source.distribution) return self.__class__( - scheme_dict, + source=source, + scheme_dict=scheme_dict, interpreter=self.interpreter, script_kind=self.script_kind, bytecode_optimization_levels=self.bytecode_optimization_levels, ) + def _validate_hash_and_size( + self, records: Iterable[tuple[Scheme, RecordEntry]] + ) -> None: + record_lines = self._source.read_dist_info("RECORD").splitlines() + record_mapping = { + record[0]: record for record in parse_record_file(record_lines) + } + for item in self._source._zipfile.infolist(): + record_args = record_mapping.pop(item.filename, None) + if not record_args: + continue + + file_record = RecordEntry.from_elements(*record_args) + computed_record = next( + record for _, record in records if record.path == item.filename + ) + if ( + file_record.hash_ is not None + and computed_record.hash_ is not None + and file_record.hash_ != computed_record.hash_ + ) or ( + file_record.size is not None + and computed_record.size is not None + and file_record.size != computed_record.size + ): + self.issues.append( + f"In {self._source._zipfile.filename}, hash / size of" + f" {item.filename} didn't match RECORD" + ) + + def finalize_installation( + self, + scheme: Scheme, + record_file_path: str, + records: Iterable[tuple[Scheme, RecordEntry]], + ) -> None: + self._validate_hash_and_size(records) + return super().finalize_installation(scheme, record_file_path, records) + class WheelInstaller: def __init__(self, env: Env) -> None: @@ -89,26 +150,42 @@ def __init__(self, env: Env) -> None: schemes = self._env.paths schemes["headers"] = schemes["include"] - self._destination = WheelDestination( - schemes, interpreter=str(self._env.python), script_kind=script_kind - ) - + self._script_kind = script_kind + self._schemes = schemes + self._bytecode_compilation_enabled = False self.invalid_wheels: dict[Path, list[str]] = {} def enable_bytecode_compilation(self, enable: bool = True) -> None: - self._destination.bytecode_optimization_levels = (-1,) if enable else () + self._bytecode_compilation_enabled = enable def install(self, wheel: Path) -> None: with WheelFile.open(wheel) as source: + destination = WheelDestination( + source=source, + scheme_dict=self._schemes, + interpreter=str(self._env.python), + script_kind=self._script_kind, + ) + destination.bytecode_optimization_levels = ( + (-1,) if self._bytecode_compilation_enabled else () + ) + destination = destination.for_source(source) try: - source.validate_record() + # Content validation is disabled to avoid performing hash + # computation on files twice. We perform this kind of validation + # while installing the wheel. See _validate_hash_and_size. + source.validate_record(validate_contents=False) except _WheelFileValidationError as e: self.invalid_wheels[wheel] = e.issues install( source=source, - destination=self._destination.for_source(source), + destination=destination, # Additional metadata that is generated by the installation tool. additional_metadata={ "INSTALLER": f"Poetry {__version__}".encode(), }, ) + if destination.issues: + self.invalid_wheels[wheel] = ( + self.invalid_wheels.get(wheel, []) + destination.issues + )