Skip to content

Commit

Permalink
243 Develop a retry mechanism for fetching artifacts in bump-recipe (
Browse files Browse the repository at this point in the history
…#261)

* First pass at retry mechanism for artifact fetching

* Allows for user-customizable HTTP retry interval. Fixes a handful of logging messages

* Improves logging messages

* Adds initial bump-recipe retry unit test

* Adds retry test for recipes with hash variables

* Adds retry unit test for multi-sourced recipes
  • Loading branch information
schuylermartin45 authored Dec 3, 2024
1 parent 54c7f2b commit 2a446b6
Show file tree
Hide file tree
Showing 8 changed files with 306 additions and 28 deletions.
129 changes: 104 additions & 25 deletions conda_recipe_manager/commands/bump_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import logging
import sys
import time
from pathlib import Path
from typing import Final, Optional, cast

Expand All @@ -14,11 +15,13 @@
from conda_recipe_manager.commands.utils.types import ExitCode
from conda_recipe_manager.fetcher.artifact_fetcher import from_recipe as af_from_recipe
from conda_recipe_manager.fetcher.base_artifact_fetcher import BaseArtifactFetcher
from conda_recipe_manager.fetcher.exceptions import FetchError
from conda_recipe_manager.fetcher.http_artifact_fetcher import HttpArtifactFetcher
from conda_recipe_manager.parser.recipe_parser import RecipeParser
from conda_recipe_manager.types import JsonPatchType

log = logging.getLogger(__name__)
# Truncates the `__name__` to the crm command name.
log = logging.getLogger(__name__.rsplit(".", maxsplit=1)[-1])

## Constants ##

Expand All @@ -37,6 +40,12 @@ class RecipePaths:
# Common variable names used for source artifact hashes.
_COMMON_HASH_VAR_NAMES: Final[set[str]] = {"sha256", "hash", "hash_val", "hash_value"}

# Maximum number of retries to attempt when trying to fetch an external artifact.
_RETRY_LIMIT: Final[int] = 5
# How much longer (in seconds) we should wait per retry.
_DEFAULT_RETRY_INTERVAL: Final[int] = 30


## Functions ##


Expand All @@ -56,6 +65,21 @@ def _validate_target_version(ctx: click.Context, param: str, value: str) -> str:
return value


def _validate_retry_interval(ctx: click.Context, param: str, value: float) -> float: # pylint: disable=unused-argument
"""
Provides additional input validation on the retry interval
:param ctx: Click's context object
:param param: Argument parameter name
:param value: Target value to validate
:raises click.BadParameter: In the event the input is not valid.
:returns: The value of the argument, if valid.
"""
if value <= 0:
raise click.BadParameter("The retry interval must be a positive, non-zero floating-point value.")
return value


def _exit_on_failed_patch(recipe_parser: RecipeParser, patch_blob: JsonPatchType) -> None:
"""
Convenience function that exits the program when a patch operation fails. This standardizes how we handle patch
Expand All @@ -72,6 +96,16 @@ def _exit_on_failed_patch(recipe_parser: RecipeParser, patch_blob: JsonPatchType
sys.exit(ExitCode.PATCH_ERROR)


def _exit_on_failed_fetch(fetcher: BaseArtifactFetcher) -> None:
"""
Exits the script upon a failed fetch.
:param fetcher: ArtifactFetcher instance used in the fetch attempt.
"""
log.error("Failed to fetch `%s` after %s retries.", fetcher, _RETRY_LIMIT)
sys.exit(ExitCode.HTTP_ERROR)


def _pre_process_cleanup(recipe_content: str) -> str:
"""
Performs some recipe clean-up tasks before parsing the recipe file. This should correct common issues and improve
Expand Down Expand Up @@ -142,40 +176,45 @@ def _update_version(recipe_parser: RecipeParser, target_version: str) -> None:
_exit_on_failed_patch(recipe_parser, {"op": op, "path": RecipePaths.VERSION, "value": target_version})


def _get_sha256(fetcher: HttpArtifactFetcher) -> str:
def _get_sha256(fetcher: HttpArtifactFetcher, retry_interval: float) -> str:
"""
Wrapping function that attempts to retrieve an HTTP/HTTPS artifact with a retry mechanism.
:param fetcher: Artifact fetching instance to use.
:param retry_interval: Scalable interval between fetch requests.
:raises FetchError: If an issue occurred while downloading or extracting the archive.
:returns: The SHA-256 hash of the artifact, if it was able to be downloaded.
"""
# TODO retry mechanism, and log attempts
# TODO attempt fetch in the background, especially if multiple fetch() calls are required.
fetcher.fetch()
return fetcher.get_archive_sha256()


def _update_sha256(recipe_parser: RecipeParser) -> None:
# NOTE: This is the most I/O-bound operation in `bump-recipe` by a country mile. At the time of writing,
# running this operation in the background will not make any significant improvements to performance. Every other
# operation is so fast in comparison, any gains would likely be lost with the additional overhead. This op is
# also inherently reliant on having the version change performed ahead of time. In addition, parallelizing the
# retries defeats the point of having a back-off timer.
for retry_id in range(1, _RETRY_LIMIT + 1):
try:
log.info("Fetching artifact `%s`, attempt #%d", fetcher, retry_id)
fetcher.fetch()
return fetcher.get_archive_sha256()
except FetchError:
time.sleep(retry_id * retry_interval)
raise FetchError(f"Failed to fetch `{fetcher}` after {_RETRY_LIMIT} retries.")


def _update_sha256_check_hash_var(
recipe_parser: RecipeParser, retry_interval: float, fetcher_tbl: dict[str, BaseArtifactFetcher]
) -> bool:
"""
Attempts to update the SHA-256 hash(s) in the `/source` section of a recipe file, if applicable. Note that this is
only required for build artifacts that are hosted as compressed software archives. If this field must be updated,
a lengthy network request may be required to calculate the new hash.
NOTE: For this to make any meaningful changes, the `version` field will need to be updated first.
Helper function that checks if the SHA-256 is stored in a variable. If it does, it performs the update.
:param recipe_parser: Recipe file to update.
:param retry_interval: Scalable interval between fetch requests.
:param fetcher_tbl: Table of artifact source locations to corresponding ArtifactFetcher instances.
:returns: True if `_update_sha256()` should return early. False otherwise.
"""
fetcher_tbl = af_from_recipe(recipe_parser, True)
if not fetcher_tbl:
log.warning("`/source` is missing or does not contain a supported source type.")
return

# Check to see if the SHA-256 hash might be set in a variable. In extremely rare cases, we log warnings to indicate
# that the "correct" action is unclear and likely requires human intervention. Otherwise, if we see a hash variable
# and it is used by a single source, we will edit the variable directly.
hash_vars_set: Final[set[str]] = _COMMON_HASH_VAR_NAMES & set(recipe_parser.list_variables())

if len(hash_vars_set) == 1 and len(fetcher_tbl) == 1:
hash_var: Final[str] = next(iter(hash_vars_set))
src_fetcher: Final[Optional[BaseArtifactFetcher]] = fetcher_tbl.get(RecipePaths.SOURCE, None)
Expand All @@ -186,8 +225,11 @@ def _update_sha256(recipe_parser: RecipeParser) -> None:
# NOTE: This is a linear search on a small list.
and RecipePaths.SINGLE_SHA_256 in recipe_parser.get_variable_references(hash_var)
):
recipe_parser.set_variable(hash_var, _get_sha256(src_fetcher))
return
try:
recipe_parser.set_variable(hash_var, _get_sha256(src_fetcher, retry_interval))
except FetchError:
_exit_on_failed_fetch(src_fetcher)
return True

log.warning(
(
Expand All @@ -201,17 +243,43 @@ def _update_sha256(recipe_parser: RecipeParser) -> None:
"Multiple commonly used hash variables detected. Hash values will be changed directly in `/source` keys."
)

return False


def _update_sha256(recipe_parser: RecipeParser, retry_interval: float) -> None:
"""
Attempts to update the SHA-256 hash(s) in the `/source` section of a recipe file, if applicable. Note that this is
only required for build artifacts that are hosted as compressed software archives. If this field must be updated,
a lengthy network request may be required to calculate the new hash.
NOTE: For this to make any meaningful changes, the `version` field will need to be updated first.
:param recipe_parser: Recipe file to update.
:param retry_interval: Scalable interval between fetch requests.
"""
fetcher_tbl = af_from_recipe(recipe_parser, True)
if not fetcher_tbl:
log.warning("`/source` is missing or does not contain a supported source type.")
return

if _update_sha256_check_hash_var(recipe_parser, retry_interval, fetcher_tbl):
return

# NOTE: Each source _might_ have a different SHA-256 hash. This is the case for the `cctools-ld64` feedstock. That
# project has a different implementation per architecture. However, in other circumstances, mirrored sources with
# different hashes might imply there is a security threat. We will log some statistics so the user can best decide
# what to do.
unique_hashes: set[str] = set()
total_hash_cntr = 0
# TODO parallelize requests for multiple sources
for src_path, fetcher in fetcher_tbl.items():
if not isinstance(fetcher, HttpArtifactFetcher):
continue

sha = _get_sha256(fetcher)
try:
sha = _get_sha256(fetcher, retry_interval)
except FetchError:
_exit_on_failed_fetch(fetcher)
total_hash_cntr += 1
unique_hashes.add(sha)
sha_path = RecipeParser.append_to_path(src_path, "/sha256")
Expand Down Expand Up @@ -248,7 +316,18 @@ def _update_sha256(recipe_parser: RecipeParser) -> None:
callback=_validate_target_version,
help="New project version to target. Required if `--build-num` is NOT specified.",
)
def bump_recipe(recipe_file_path: str, build_num: bool, target_version: Optional[str]) -> None:
@click.option(
"-i",
"--retry-interval",
default=_DEFAULT_RETRY_INTERVAL,
type=float,
callback=_validate_retry_interval,
help=(
"Retry interval (in seconds) for network requests. Scales with number of failed attempts."
f" Defaults to {_DEFAULT_RETRY_INTERVAL} seconds"
),
)
def bump_recipe(recipe_file_path: str, build_num: bool, target_version: Optional[str], retry_interval: float) -> None:
"""
Bumps a recipe to a new version.
Expand Down Expand Up @@ -286,7 +365,7 @@ def bump_recipe(recipe_file_path: str, build_num: bool, target_version: Optional

# Version must be updated before hash to ensure the correct artifact is hashed.
_update_version(recipe_parser, target_version)
_update_sha256(recipe_parser)
_update_sha256(recipe_parser, retry_interval)

Path(recipe_file_path).write_text(recipe_parser.render(), encoding="utf-8")
sys.exit(ExitCode.SUCCESS)
2 changes: 1 addition & 1 deletion conda_recipe_manager/commands/conda_recipe_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def conda_recipe_manager(verbose: bool) -> None:
"""
# Initialize the logger, available to all CRM commands.
logging.basicConfig(
format="[%(levelname)s] [%(name)s]: %(message)s",
format="%(asctime)s [%(levelname)s] [%(name)s]: %(message)s",
level=logging.DEBUG if verbose else logging.INFO,
)

Expand Down
1 change: 1 addition & 0 deletions conda_recipe_manager/commands/utils/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class ExitCode(IntEnum):

# bump-recipe
PATCH_ERROR = 107
HTTP_ERROR = 108

## rattler-bulk-build ##
# NOTE: There may be overlap with rattler-build
Expand Down
8 changes: 8 additions & 0 deletions conda_recipe_manager/fetcher/base_artifact_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ def _fetch_guard(self, msg: str) -> None:
return
raise FetchRequiredError(msg)

def __str__(self) -> str:
"""
Returns a simple string identifier that identifies an ArtifactFetcher instance.
:returns: String identifier (name) of the ArtifactFetcher.
"""
return self._name

@abstractmethod
def fetch(self) -> None:
"""
Expand Down
37 changes: 35 additions & 2 deletions tests/commands/test_bump_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ def test_bump_recipe_cli(
runner = CliRunner()
fs.add_real_directory(get_test_path(), read_only=False)

recipe_file_path = get_test_path() / recipe_file
expected_recipe_file_path = get_test_path() / expected_recipe_file
recipe_file_path: Final[Path] = get_test_path() / recipe_file
expected_recipe_file_path: Final[Path] = get_test_path() / expected_recipe_file

cli_args: Final[list[str]] = (
["--build-num", str(recipe_file_path)] if version is None else ["-t", version, str(recipe_file_path)]
Expand All @@ -153,6 +153,39 @@ def test_bump_recipe_cli(
assert result.exit_code == ExitCode.SUCCESS


@pytest.mark.parametrize(
"recipe_file,version,expected_retries",
[
("bump_recipe/types-toml_bad_url.yaml", "0.10.8.20240310", 5),
("bump_recipe/types-toml_bad_url_hash_var.yaml", "0.10.8.20240310", 5),
# As of writing, the script will fail on the first URL that fails to be fetched, thus the count is half what
# one might expect.
("bump_recipe/types-toml_bad_url_multi_source.yaml", "0.10.8.20240310", 5),
# TODO validate V1 recipe files
],
)
def test_bump_recipe_http_retry_mechanism(
fs: FakeFilesystem, recipe_file: str, version: str, expected_retries: int
) -> None:
"""
Ensures that the recipe retry mechanism is used in the event the source artifact URLs are unreachable.
:param fs: `pyfakefs` Fixture used to replace the file system
:param recipe_file: Target recipe file to update
:param version: Version to bump to
:param expected_retries: Expected number of retries that should have been attempted
"""

runner = CliRunner()
fs.add_real_directory(get_test_path(), read_only=False)
recipe_file_path: Final[Path] = get_test_path() / recipe_file
with patch("requests.get") as mocker:
result = runner.invoke(bump_recipe.bump_recipe, ["-t", version, "-i", "0.01", str(recipe_file_path)])
assert mocker.call_count == expected_retries

assert result.exit_code == ExitCode.HTTP_ERROR


def test_bump_recipe_exits_if_target_version_missing() -> None:
"""
Ensures that the `--target-version` flag is required when `--build-num` is NOT provided.
Expand Down
51 changes: 51 additions & 0 deletions tests/test_aux_files/bump_recipe/types-toml_bad_url.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{% set name = "types-toml" %}
{% set version = "0.10.8.6" %}

package:
name: {{ name|lower }}
version: {{ version }}

source:
url: https://fake.website.total.bullshit/artifact.tar.gz
sha256: 6d3ac79e36c9ee593c5d4fb33a50cca0e3adceb6ef5cff8b8e5aef67b4c4aaf2

build:
number: 0
skip: true # [py<37]
script: {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation

requirements:
host:
- setuptools
- wheel
- pip
- python
run:
- python

test:
imports:
- types
requires:
- pip
commands:
- pip check
- test -f $SP_DIR/toml-stubs/__init__.pyi # [unix]

about:
home: https://github.com/python/typeshed
summary: Typing stubs for toml
description: |
This is a PEP 561 type stub package for the toml package.
It can be used by type-checking tools like mypy, pyright,
pytype, PyCharm, etc. to check code that uses toml.
license: Apache-2.0 AND MIT
license_file: LICENSE
license_family: OTHER
dev_url: https://github.com/python/typeshed
doc_url: https://pypi.org/project/types-toml/

extra:
recipe-maintainers:
- fhoehle
- conda-forge/mypy
Loading

0 comments on commit 2a446b6

Please sign in to comment.