diff --git a/.gitignore b/.gitignore index d9e50d3989..c605f43847 100644 --- a/.gitignore +++ b/.gitignore @@ -18,7 +18,6 @@ env/ build/ develop-eggs/ dist/ -downloads/ eggs/ .eggs/ lib64/ @@ -118,5 +117,9 @@ pip-wheel-metadata # Textual snapshot_report.html +# Nextflow inspect +tests/pipelines/null +tests/pipelines/.nextflow + # AI CLAUDE.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 91c31154dc..91ff936b1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,12 @@ - Update pre-commit hook pre-commit/mirrors-mypy to v1.17.1 ([#3698](https://github.com/nf-core/tools/pull/3698)) - Update python:3.13-slim Docker digest to 4c2cf99 ([#3700](https://github.com/nf-core/tools/pull/3700)) - Validation of meta.yaml in cross-org repos ([#3680](https://github.com/nf-core/tools/pull/3680)) +- Refactor downloads command ([#3634](https://github.com/nf-core/tools/pull/3634)) + - Split `download.py` into subdirectory `download/` + - Use `nextflow inspect` for container discovery and remove legacy regex container discovery (requires Nextflow >= 25.04.04) + - Add support for downloading docker images into tar archives + - Change long flag `--parallel-downloads` to `--parallel`. Short flag remains `-d`. + - Add pipeline to test data to be compatible with `nextflow inspect` - Replace arm profile with arm64 and emulate_amd64 profiles ([#3689](https://github.com/nf-core/tools/pull/3689)) - Update test-datasets list subcommand to output plain text urls and paths for easy copying [#3720](https://github.com/nf-core/tools/pull/3720) - Remove workflow.trace from nf-test snapshot ([#3721](https://github.com/nf-core/tools/pull/3721)) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 6b51306a00..a07be46990 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -61,7 +61,7 @@ ) from nf_core.components.components_completion import autocomplete_modules, autocomplete_subworkflows from nf_core.components.constants import NF_CORE_MODULES_REMOTE -from nf_core.pipelines.download import DownloadError +from nf_core.pipelines.download.download import DownloadError from nf_core.pipelines.list import autocomplete_pipelines from nf_core.utils import check_if_outdated, nfcore_logo, rich_force_colors, setup_nfcore_dir @@ -400,33 +400,33 @@ def command_pipelines_lint( @click.option( "-s", "--container-system", - type=click.Choice(["none", "singularity"]), + type=click.Choice(["none", "singularity", "docker"]), help="Download container images of required software.", ) @click.option( "-l", "--container-library", multiple=True, - help="Container registry/library or mirror to pull images from.", + help="Container registry/library or mirror to pull images from. Not available for Docker containers.", ) @click.option( "-u", "--container-cache-utilisation", type=click.Choice(["amend", "copy", "remote"]), - help="Utilise a `singularity.cacheDir` in the download process, if applicable.", + help="Utilise a `singularity.cacheDir` in the download process, if applicable. Not available for Docker containers.", ) @click.option( "-i", "--container-cache-index", type=str, - help="List of images already available in a remote `singularity.cacheDir`.", + help="List of images already available in a remote `singularity.cacheDir`. Not available for Docker containers.", ) @click.option( "-d", "--parallel-downloads", type=int, default=4, - help="Number of parallel image downloads", + help="Number of allowed parallel tasks", ) @click.pass_context def command_pipelines_download( @@ -2360,7 +2360,7 @@ def command_create_params_file(pipeline, revision, output, force, show_hidden): @click.option( "-s", "--container-system", - type=click.Choice(["none", "singularity"]), + type=click.Choice(["none", "singularity", "docker"]), help="Download container images of required software.", ) @click.option( diff --git a/nf_core/commands_pipelines.py b/nf_core/commands_pipelines.py index c4eda78b11..9061d50b55 100644 --- a/nf_core/commands_pipelines.py +++ b/nf_core/commands_pipelines.py @@ -199,6 +199,7 @@ def pipelines_download( container_cache_utilisation, container_cache_index, parallel_downloads, + ctx.obj["hide_progress"], ) dl.download_workflow() diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py deleted file mode 100644 index 677e46b2e4..0000000000 --- a/nf_core/pipelines/download.py +++ /dev/null @@ -1,1990 +0,0 @@ -"""Downloads a nf-core pipeline to the local file system.""" - -import concurrent.futures -import io -import logging -import os -import re -import shutil -import subprocess -import tarfile -import textwrap -from datetime import datetime -from pathlib import Path -from typing import Any, Optional -from zipfile import ZipFile - -import git -import questionary -import requests -import requests_cache -import rich -import rich.progress -from git.exc import GitCommandError, InvalidGitRepositoryError -from packaging.version import Version - -import nf_core -import nf_core.modules.modules_utils -import nf_core.pipelines.list -import nf_core.utils -from nf_core.synced_repo import RemoteProgressbar, SyncedRepo -from nf_core.utils import ( - NFCORE_CACHE_DIR, - NFCORE_DIR, - SingularityCacheFilePathValidator, -) - -log = logging.getLogger(__name__) -stderr = rich.console.Console( - stderr=True, - style="dim", - highlight=False, - force_terminal=nf_core.utils.rich_force_colors(), -) - - -class DownloadError(RuntimeError): - """A custom exception that is raised when nf-core pipelines download encounters a problem that we already took into consideration. - In this case, we do not want to print the traceback, but give the user some concise, helpful feedback instead. - """ - - -class DownloadProgress(rich.progress.Progress): - """Custom Progress bar class, allowing us to have two progress - bars with different columns / layouts. - """ - - def get_renderables(self): - for task in self.tasks: - if task.fields.get("progress_type") == "summary": - self.columns = ( - "[magenta]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>3.0f}%", - "•", - "[green]{task.completed}/{task.total} completed", - ) - if task.fields.get("progress_type") == "download": - self.columns = ( - "[blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>3.1f}%", - "•", - rich.progress.DownloadColumn(), - "•", - rich.progress.TransferSpeedColumn(), - ) - if task.fields.get("progress_type") == "singularity_pull": - self.columns = ( - "[magenta]{task.description}", - "[blue]{task.fields[current_log]}", - rich.progress.BarColumn(bar_width=None), - ) - yield self.make_tasks_table([task]) - - -class DownloadWorkflow: - """Downloads a nf-core workflow from GitHub to the local file system. - - Can also download its Singularity container image if required. - - Args: - pipeline (str): A nf-core pipeline name. - revision (List[str]): The workflow revision(s) to download, like `1.0` or `dev` . Defaults to None. - outdir (str): Path to the local download directory. Defaults to None. - compress_type (str): Type of compression for the downloaded files. Defaults to None. - force (bool): Flag to force download even if files already exist (overwrite existing files). Defaults to False. - platform (bool): Flag to customize the download for Seqera Platform (convert to git bare repo). Defaults to False. - download_configuration (str): Download the configuration files from nf-core/configs. Defaults to None. - tag (List[str]): Specify additional tags to add to the downloaded pipeline. Defaults to None. - container_system (str): The container system to use (e.g., "singularity"). Defaults to None. - container_library (List[str]): The container libraries (registries) to use. Defaults to None. - container_cache_utilisation (str): If a local or remote cache of already existing container images should be considered. Defaults to None. - container_cache_index (str): An index for the remote container cache. Defaults to None. - parallel_downloads (int): The number of parallel downloads to use. Defaults to 4. - """ - - def __init__( - self, - pipeline=None, - revision=None, - outdir=None, - compress_type=None, - force=False, - platform=False, - download_configuration=None, - additional_tags=None, - container_system=None, - container_library=None, - container_cache_utilisation=None, - container_cache_index=None, - parallel_downloads=4, - ): - self.pipeline = pipeline - if isinstance(revision, str): - self.revision = [revision] - elif isinstance(revision, tuple): - self.revision = [*revision] - else: - self.revision = [] - self.outdir = outdir - self.output_filename = None - self.compress_type = compress_type - self.force = force - self.platform = platform - self.fullname: Optional[str] = None - # downloading configs is not supported for Seqera Platform downloads. - self.include_configs = True if download_configuration == "yes" and not bool(platform) else False - # Additional tags to add to the downloaded pipeline. This enables to mark particular commits or revisions with - # additional tags, e.g. "stable", "testing", "validated", "production" etc. Since this requires a git-repo, it is only - # available for the bare / Seqera Platform download. - if isinstance(additional_tags, str) and bool(len(additional_tags)) and self.platform: - self.additional_tags = [additional_tags] - elif isinstance(additional_tags, tuple) and bool(len(additional_tags)) and self.platform: - self.additional_tags = [*additional_tags] - else: - self.additional_tags = None - # Specifying a cache index or container library implies that containers should be downloaded. - self.container_system = "singularity" if container_cache_index or bool(container_library) else container_system - # Manually specified container library (registry) - if isinstance(container_library, str) and bool(len(container_library)): - self.container_library = [container_library] - elif isinstance(container_library, tuple) and bool(len(container_library)): - self.container_library = [*container_library] - else: - self.container_library = ["quay.io"] - # Create a new set and add all values from self.container_library (CLI arguments to --container-library) - self.registry_set = set(self.container_library) if hasattr(self, "container_library") else set() - # if a container_cache_index is given, use the file and overrule choice. - self.container_cache_utilisation = "remote" if container_cache_index else container_cache_utilisation - self.container_cache_index = container_cache_index - # allows to specify a container library / registry or a respective mirror to download images from - self.parallel_downloads = parallel_downloads - - self.wf_revisions = [] - self.wf_branches: dict[str, Any] = {} - self.wf_sha = {} - self.wf_download_url = {} - self.nf_config = {} - self.containers = [] - self.containers_remote = [] # stores the remote images provided in the file. - - # Fetch remote workflows - self.wfs = nf_core.pipelines.list.Workflows() - self.wfs.get_remote_workflows() - - def download_workflow(self): - """Starts a nf-core workflow download.""" - - # Get workflow details - try: - self.prompt_pipeline_name() - self.pipeline, self.wf_revisions, self.wf_branches = nf_core.utils.get_repo_releases_branches( - self.pipeline, self.wfs - ) - self.prompt_revision() - self.get_revision_hash() - # Inclusion of configs is unnecessary for Seqera Platform. - if not self.platform and self.include_configs is None: - self.prompt_config_inclusion() - # If a remote cache is specified, it is safe to assume images should be downloaded. - if not self.container_cache_utilisation == "remote": - self.prompt_container_download() - else: - self.container_system = "singularity" - self.prompt_singularity_cachedir_creation() - self.prompt_singularity_cachedir_utilization() - self.prompt_singularity_cachedir_remote() - # Nothing meaningful to compress here. - if not self.platform: - self.prompt_compression_type() - except AssertionError as e: - raise DownloadError(e) from e - - summary_log = [ - f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0] + ',[' + str(len(self.revision) - 2) + ' more revisions],' + self.revision[-1]}'", - f"Use containers: '{self.container_system}'", - ] - if self.container_system: - summary_log.append(f"Container library: '{', '.join(self.container_library)}'") - if self.container_system == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: - summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}'") - if self.containers_remote: - summary_log.append( - f"Successfully read {len(self.containers_remote)} containers from the remote '$NXF_SINGULARITY_CACHEDIR' contents." - ) - - # Set an output filename now that we have the outdir - if self.platform: - self.output_filename = f"{self.outdir}.git" - summary_log.append(f"Output file: '{self.output_filename}'") - elif self.compress_type is not None: - self.output_filename = f"{self.outdir}.{self.compress_type}" - summary_log.append(f"Output file: '{self.output_filename}'") - else: - summary_log.append(f"Output directory: '{self.outdir}'") - - if not self.platform: - # Only show entry, if option was prompted. - summary_log.append(f"Include default institutional configuration: '{self.include_configs}'") - else: - summary_log.append(f"Enabled for Seqera Platform: '{self.platform}'") - - # Check that the outdir doesn't already exist - if self.outdir is not None and os.path.exists(self.outdir): - if not self.force: - raise DownloadError( - f"Output directory '{self.outdir}' already exists (use [red]--force[/] to overwrite)" - ) - log.warning(f"Deleting existing output directory: '{self.outdir}'") - shutil.rmtree(self.outdir) - - # Check that compressed output file doesn't already exist - if self.output_filename and os.path.exists(self.output_filename): - if not self.force: - raise DownloadError( - f"Output file '{self.output_filename}' already exists (use [red]--force[/] to overwrite)" - ) - log.warning(f"Deleting existing output file: '{self.output_filename}'") - os.remove(self.output_filename) - - # Summary log - log.info("Saving '{}'\n {}".format(self.pipeline, "\n ".join(summary_log))) - - # Perform the actual download - if self.platform: - self.download_workflow_platform() - else: - self.download_workflow_static() - - def download_workflow_static(self): - """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" - - # Download the centralised configs first - if self.include_configs: - log.info("Downloading centralised configs from GitHub") - self.download_configs() - - # Download the pipeline files for each selected revision - log.info("Downloading workflow files from GitHub") - - for item in zip(self.revision, self.wf_sha.values(), self.wf_download_url.values()): - revision_dirname = self.download_wf_files(revision=item[0], wf_sha=item[1], download_url=item[2]) - - if self.include_configs: - try: - self.wf_use_local_configs(revision_dirname) - except FileNotFoundError as e: - raise DownloadError("Error editing pipeline config file to use local configs!") from e - - # Collect all required singularity images - if self.container_system == "singularity": - self.find_container_images(os.path.join(self.outdir, revision_dirname)) - self.gather_registries(os.path.join(self.outdir, revision_dirname)) - - try: - self.get_singularity_images(current_revision=item[0]) - except OSError as e: - raise DownloadError(f"[red]{e}[/]") from e - - # Compress into an archive - if self.compress_type is not None: - log.info("Compressing output into archive") - self.compress_download() - - def download_workflow_platform(self, location=None): - """Create a bare-cloned git repository of the workflow, so it can be launched with `tw launch` as file:/ pipeline""" - - log.info("Collecting workflow from GitHub") - - self.workflow_repo = WorkflowRepo( - remote_url=f"https://github.com/{self.pipeline}.git", - revision=self.revision if self.revision else None, - commit=self.wf_sha.values() if bool(self.wf_sha) else None, - additional_tags=self.additional_tags, - location=(location if location else None), # manual location is required for the tests to work - in_cache=False, - ) - - # Remove tags for those revisions that had not been selected - self.workflow_repo.tidy_tags_and_branches() - - # create a bare clone of the modified repository needed for Seqera Platform - self.workflow_repo.bare_clone(os.path.join(self.outdir, self.output_filename)) - - # extract the required containers - if self.container_system == "singularity": - for revision, commit in self.wf_sha.items(): - # Checkout the repo in the current revision - self.workflow_repo.checkout(commit) - # Collect all required singularity images - self.find_container_images(self.workflow_repo.access()) - self.gather_registries(self.workflow_repo.access()) - - try: - self.get_singularity_images(current_revision=revision) - except OSError as e: - raise DownloadError(f"[red]{e}[/]") from e - - # Justify why compression is skipped for Seqera Platform downloads (Prompt is not shown, but CLI argument could have been set) - if self.compress_type is not None: - log.info( - "Compression choice is ignored for Seqera Platform downloads since nothing can be reasonably compressed." - ) - - def prompt_pipeline_name(self): - """Prompt for the pipeline name if not set with a flag""" - - if self.pipeline is None: - stderr.print("Specify the name of a nf-core pipeline or a GitHub repository name (user/repo).") - self.pipeline = nf_core.utils.prompt_remote_pipeline_name(self.wfs) - - def prompt_revision(self) -> None: - """ - Prompt for pipeline revision / branch - Prompt user for revision tag if '--revision' was not set - If --platform is specified, allow to select multiple revisions - Also the static download allows for multiple revisions, but - we do not prompt this option interactively. - """ - if not bool(self.revision): - (choice, tag_set) = nf_core.utils.prompt_pipeline_release_branch( - self.wf_revisions, self.wf_branches, multiple=self.platform - ) - """ - The checkbox() prompt unfortunately does not support passing a Validator, - so a user who keeps pressing Enter will flounder past the selection without choice. - - bool(choice), bool(tag_set): - ############################# - True, True: A choice was made and revisions were available. - False, True: No selection was made, but revisions were available -> defaults to all available. - False, False: No selection was made because no revisions were available -> raise AssertionError. - True, False: Congratulations, you found a bug! That combo shouldn't happen. - """ - - if bool(choice): - # have to make sure that self.revision is a list of strings, regardless if choice is str or list of strings. - (self.revision.append(choice) if isinstance(choice, str) else self.revision.extend(choice)) - else: - if bool(tag_set): - self.revision = tag_set - log.info("No particular revision was selected, all available will be downloaded.") - else: - raise AssertionError(f"No revisions of {self.pipeline} available for download.") - - def get_revision_hash(self): - """Find specified revision / branch / commit hash""" - - for revision in self.revision: # revision is a list of strings, but may be of length 1 - # Branch - if revision in self.wf_branches.keys(): - self.wf_sha = {**self.wf_sha, revision: self.wf_branches[revision]} - - else: - # Revision - for r in self.wf_revisions: - if r["tag_name"] == revision: - self.wf_sha = {**self.wf_sha, revision: r["tag_sha"]} - break - - else: - # Commit - full or short hash - if commit_id := nf_core.utils.get_repo_commit(self.pipeline, revision): - self.wf_sha = {**self.wf_sha, revision: commit_id} - continue - - # Can't find the revisions or branch - throw an error - log.info( - "Available {} revisions: '{}'".format( - self.pipeline, - "', '".join([r["tag_name"] for r in self.wf_revisions]), - ) - ) - log.info("Available {} branches: '{}'".format(self.pipeline, "', '".join(self.wf_branches.keys()))) - raise AssertionError( - f"Not able to find revision / branch / commit '{revision}' for {self.pipeline}" - ) - - # Set the outdir - if not self.outdir: - if len(self.wf_sha) > 1: - self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{datetime.now().strftime('%Y-%m-%d_%H-%M')}" - else: - self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{self.revision[0]}" - - if not self.platform: - for revision, wf_sha in self.wf_sha.items(): - # Set the download URL and return - only applicable for classic downloads - self.wf_download_url = { - **self.wf_download_url, - revision: f"https://github.com/{self.pipeline}/archive/{wf_sha}.zip", - } - - def prompt_config_inclusion(self): - """Prompt for inclusion of institutional configurations""" - if stderr.is_interactive: # Use rich auto-detection of interactive shells - self.include_configs = questionary.confirm( - "Include the nf-core's default institutional configuration files into the download?", - style=nf_core.utils.nfcore_question_style, - ).ask() - else: - self.include_configs = False - # do not include by default. - - def prompt_container_download(self): - """Prompt whether to download container images or not""" - - if self.container_system is None and stderr.is_interactive and not self.platform: - stderr.print("\nIn addition to the pipeline code, this tool can download software containers.") - self.container_system = questionary.select( - "Download software container images:", - choices=["none", "singularity"], - style=nf_core.utils.nfcore_question_style, - ).unsafe_ask() - - def prompt_singularity_cachedir_creation(self): - """Prompt about using $NXF_SINGULARITY_CACHEDIR if not already set""" - if ( - self.container_system == "singularity" - and os.environ.get("NXF_SINGULARITY_CACHEDIR") is None - and stderr.is_interactive # Use rich auto-detection of interactive shells - ): - stderr.print( - "\nNextflow and nf-core can use an environment variable called [blue]$NXF_SINGULARITY_CACHEDIR[/] that is a path to a directory where remote Singularity images are stored. " - "This allows downloaded images to be cached in a central location." - ) - if rich.prompt.Confirm.ask( - "[blue bold]?[/] [bold]Define [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] for a shared Singularity image download folder?[/]" - ): - if not self.container_cache_index: - self.container_cache_utilisation == "amend" # retain "remote" choice. - # Prompt user for a cache directory path - cachedir_path = None - while cachedir_path is None: - prompt_cachedir_path = questionary.path( - "Specify the path:", - only_directories=True, - style=nf_core.utils.nfcore_question_style, - ).unsafe_ask() - cachedir_path = os.path.abspath(os.path.expanduser(prompt_cachedir_path)) - if prompt_cachedir_path == "": - log.error("Not using [blue]$NXF_SINGULARITY_CACHEDIR[/]") - cachedir_path = False - elif not os.path.isdir(cachedir_path): - log.error(f"'{cachedir_path}' is not a directory.") - cachedir_path = None - if cachedir_path: - os.environ["NXF_SINGULARITY_CACHEDIR"] = cachedir_path - - """ - Optionally, create a permanent entry for the NXF_SINGULARITY_CACHEDIR in the terminal profile. - Currently support for bash and zsh. - ToDo: "sh", "dash", "ash","csh", "tcsh", "ksh", "fish", "cmd", "powershell", "pwsh"? - """ - - if os.getenv("SHELL", "") == "/bin/bash": - shellprofile_path = os.path.expanduser("~/~/.bash_profile") - if not os.path.isfile(shellprofile_path): - shellprofile_path = os.path.expanduser("~/.bashrc") - if not os.path.isfile(shellprofile_path): - shellprofile_path = False - elif os.getenv("SHELL", "") == "/bin/zsh": - shellprofile_path = os.path.expanduser("~/.zprofile") - if not os.path.isfile(shellprofile_path): - shellprofile_path = os.path.expanduser("~/.zshenv") - if not os.path.isfile(shellprofile_path): - shellprofile_path = False - else: - shellprofile_path = os.path.expanduser("~/.profile") - if not os.path.isfile(shellprofile_path): - shellprofile_path = False - - if shellprofile_path: - stderr.print( - f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(shellprofile_path)}[/] file ." - "This will then be automatically set every time you open a new terminal. We can add the following line to this file for you: \n" - f'[blue]export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"[/]' - ) - append_to_file = rich.prompt.Confirm.ask( - f"[blue bold]?[/] [bold]Add to [blue not bold]~/{os.path.basename(shellprofile_path)}[/] ?[/]" - ) - if append_to_file: - with open(os.path.expanduser(shellprofile_path), "a") as f: - f.write( - "\n\n#######################################\n" - f"## Added by `nf-core pipelines download` v{nf_core.__version__} ##\n" - + f'export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"' - + "\n#######################################\n" - ) - log.info(f"Successfully wrote to [blue]{shellprofile_path}[/]") - log.warning( - "You will need reload your terminal after the download completes for this to take effect." - ) - - def prompt_singularity_cachedir_utilization(self): - """Ask if we should *only* use $NXF_SINGULARITY_CACHEDIR without copying into target""" - if ( - self.container_cache_utilisation is None # no choice regarding singularity cache has been made. - and self.container_system == "singularity" - and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None - and stderr.is_interactive - ): - stderr.print( - "\nIf you are working on the same system where you will run Nextflow, you can amend the downloaded images to the ones in the" - "[blue not bold]$NXF_SINGULARITY_CACHEDIR[/] folder, Nextflow will automatically find them. " - "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." - ) - self.container_cache_utilisation = questionary.select( - "Copy singularity images from $NXF_SINGULARITY_CACHEDIR to the target folder or amend new images to the cache?", - choices=["amend", "copy"], - style=nf_core.utils.nfcore_question_style, - ).unsafe_ask() - - def prompt_singularity_cachedir_remote(self): - """Prompt about the index of a remote $NXF_SINGULARITY_CACHEDIR""" - if ( - self.container_system == "singularity" - and self.container_cache_utilisation == "remote" - and self.container_cache_index is None - and stderr.is_interactive # Use rich auto-detection of interactive shells - ): - # Prompt user for a file listing the contents of the remote cache directory - cachedir_index = None - while cachedir_index is None: - prompt_cachedir_index = questionary.path( - "Specify a list of the container images that are already present on the remote system:", - validate=SingularityCacheFilePathValidator, - style=nf_core.utils.nfcore_question_style, - ).unsafe_ask() - cachedir_index = os.path.abspath(os.path.expanduser(prompt_cachedir_index)) - if prompt_cachedir_index == "": - log.error("Will disregard contents of a remote [blue]$NXF_SINGULARITY_CACHEDIR[/]") - self.container_cache_index = None - self.container_cache_utilisation = "copy" - elif not os.access(cachedir_index, os.R_OK): - log.error(f"'{cachedir_index}' is not a readable file.") - cachedir_index = None - if cachedir_index: - self.container_cache_index = cachedir_index - # in any case read the remote containers, even if no prompt was shown. - self.read_remote_containers() - - def read_remote_containers(self): - """Reads the file specified as index for the remote Singularity cache dir""" - if ( - self.container_system == "singularity" - and self.container_cache_utilisation == "remote" - and self.container_cache_index is not None - ): - n_total_images = 0 - try: - with open(self.container_cache_index) as indexfile: - for line in indexfile.readlines(): - match = re.search(r"([^\/\\]+\.img)", line, re.S) - if match: - n_total_images += 1 - self.containers_remote.append(match.group(0)) - if n_total_images == 0: - raise LookupError("Could not find valid container names in the index file.") - self.containers_remote = sorted(list(set(self.containers_remote))) - except (FileNotFoundError, LookupError) as e: - log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") - if stderr.is_interactive and rich.prompt.Confirm.ask("[blue]Specify a new index file and try again?"): - self.container_cache_index = None # reset chosen path to index file. - self.prompt_singularity_cachedir_remote() - else: - log.info("Proceeding without consideration of the remote $NXF_SINGULARITY_CACHE index.") - self.container_cache_index = None - if os.environ.get("NXF_SINGULARITY_CACHEDIR"): - self.container_cache_utilisation = "copy" # default to copy if possible, otherwise skip. - else: - self.container_cache_utilisation = None - - def prompt_compression_type(self): - """Ask user if we should compress the downloaded files""" - if self.compress_type is None: - stderr.print( - "\nIf transferring the downloaded files to another system, it can be convenient to have everything compressed in a single file." - ) - if self.container_system == "singularity": - stderr.print( - "[bold]This is [italic]not[/] recommended when downloading Singularity images, as it can take a long time and saves very little space." - ) - self.compress_type = questionary.select( - "Choose compression type:", - choices=[ - "none", - "tar.gz", - "tar.bz2", - "zip", - ], - style=nf_core.utils.nfcore_question_style, - ).unsafe_ask() - - # Correct type for no-compression - if self.compress_type == "none": - self.compress_type = None - - def download_wf_files(self, revision, wf_sha, download_url): - """Downloads workflow files from GitHub to the :attr:`self.outdir`.""" - log.debug(f"Downloading {download_url}") - - # Download GitHub zip file into memory and extract - url = requests.get(download_url) - with ZipFile(io.BytesIO(url.content)) as zipfile: - zipfile.extractall(self.outdir) - - # create a filesystem-safe version of the revision name for the directory - revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", revision) - # account for name collisions, if there is a branch / release named "configs" or "singularity-images" - if revision_dirname in ["configs", "singularity-images"]: - revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", self.pipeline + revision_dirname) - - # Rename the internal directory name to be more friendly - gh_name = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1] - os.rename( - os.path.join(self.outdir, gh_name), - os.path.join(self.outdir, revision_dirname), - ) - - # Make downloaded files executable - for dirpath, _, filelist in os.walk(os.path.join(self.outdir, revision_dirname)): - for fname in filelist: - os.chmod(os.path.join(dirpath, fname), 0o775) - - return revision_dirname - - def download_configs(self): - """Downloads the centralised config profiles from nf-core/configs to :attr:`self.outdir`.""" - configs_zip_url = "https://github.com/nf-core/configs/archive/master.zip" - configs_local_dir = "configs-master" - log.debug(f"Downloading {configs_zip_url}") - - # Download GitHub zip file into memory and extract - url = requests.get(configs_zip_url) - with ZipFile(io.BytesIO(url.content)) as zipfile: - zipfile.extractall(self.outdir) - - # Rename the internal directory name to be more friendly - os.rename( - os.path.join(self.outdir, configs_local_dir), - os.path.join(self.outdir, "configs"), - ) - - # Make downloaded files executable - for dirpath, _, filelist in os.walk(os.path.join(self.outdir, "configs")): - for fname in filelist: - os.chmod(os.path.join(dirpath, fname), 0o775) - - def wf_use_local_configs(self, revision_dirname): - """Edit the downloaded nextflow.config file to use the local config files""" - nfconfig_fn = os.path.join(self.outdir, revision_dirname, "nextflow.config") - find_str = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - repl_str = "${projectDir}/../configs/" - log.debug(f"Editing 'params.custom_config_base' in '{nfconfig_fn}'") - - # Load the nextflow.config file into memory - with open(nfconfig_fn) as nfconfig_fh: - nfconfig = nfconfig_fh.read() - - # Replace the target string - log.debug(f"Replacing '{find_str}' with '{repl_str}'") - nfconfig = nfconfig.replace(find_str, repl_str) - - # Append the singularity.cacheDir to the end if we need it - if self.container_system == "singularity" and self.container_cache_utilisation == "copy": - nfconfig += ( - f"\n\n// Added by `nf-core pipelines download` v{nf_core.__version__} //\n" - + 'singularity.cacheDir = "${projectDir}/../singularity-images/"' - + "\n///////////////////////////////////////" - ) - - # Write the file out again - log.debug(f"Updating '{nfconfig_fn}'") - with open(nfconfig_fn, "w") as nfconfig_fh: - nfconfig_fh.write(nfconfig) - - def find_container_images(self, workflow_directory: str) -> None: - """Find container image names for workflow. - - Starts by using `nextflow config` to pull out any process.container - declarations. This works for DSL1. It should return a simple string with resolved logic, - but not always, e.g. not for differentialabundance 1.2.0 - - Second, we look for DSL2 containers. These can't be found with - `nextflow config` at the time of writing, so we scrape the pipeline files. - This returns raw matches that will likely need to be cleaned. - """ - - log.debug("Fetching container names for workflow") - # since this is run for multiple revisions now, account for previously detected containers. - previous_findings = [] if not self.containers else self.containers - config_findings = [] - module_findings = [] - - # Use linting code to parse the pipeline nextflow config - self.nf_config = nf_core.utils.fetch_wf_config(Path(workflow_directory)) - - # Find any config variables that look like a container - for k, v in self.nf_config.items(): - if (k.startswith("process.") or k.startswith("params.")) and k.endswith(".container"): - """ - Can be plain string / Docker URI or DSL2 syntax - - Since raw parsing is done by Nextflow, single quotes will be (partially) escaped in DSL2. - Use cleaning regex on DSL2. Same as for modules, except that (?(?(?:.(?!(?[\'\"]) The quote character is captured into the quote group \1. - The pattern (?:.(?!\1))*.? is used to match any character (.) not followed by the closing quote character (?!\1). - This capture happens greedy *, but we add a .? to ensure that we don't match the whole file until the last occurrence - of the closing quote character, but rather stop at the first occurrence. \1 inserts the matched quote character into the regex, either " or '. - It may be followed by whitespace or closing bracket [\\s}]* - re.DOTALL is used to account for the string to be spread out across multiple lines. - """ - container_regex = re.compile( - r"container\s+[\\s{}=$]*(?P[\'\"])(?P(?:.(?!\1))*.?)\1[\\s}]*", - re.DOTALL, - ) - - local_module_findings = re.findall(container_regex, search_space) - - # finding fill always be a tuple of length 2, first the quote used and second the enquoted value. - for finding in local_module_findings: - # append finding since we want to collect them from all modules - # also append search_space because we need to start over later if nothing was found. - module_findings.append(finding + (search_space, file_path)) - - # Not sure if there will ever be multiple container definitions per module, but beware DSL3. - # Like above run on shallow copy, because length may change at runtime. - module_findings = self.rectify_raw_container_matches(module_findings[:]) - - # Again clean list, in case config declares Docker URI but module or previous finding already had the http:// download - self.containers = self.prioritize_direct_download(previous_findings + config_findings + module_findings) - - def rectify_raw_container_matches(self, raw_findings): - """Helper function to rectify the raw extracted container matches into fully qualified container names. - If multiple containers are found, any prefixed with http for direct download is prioritized - - Example syntax: - - Early DSL2: - - .. code-block:: groovy - - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0" - } else { - container "quay.io/biocontainers/fastqc:0.11.9--0" - } - - Later DSL2: - - .. code-block:: groovy - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" - - Later DSL2, variable is being used: - - .. code-block:: groovy - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - "https://depot.galaxyproject.org/singularity/${container_id}" : - "quay.io/biocontainers/${container_id}" }" - - container_id = 'mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0' - - DSL1 / Special case DSL2: - - .. code-block:: groovy - - container "nfcore/cellranger:6.0.2" - - """ - cleaned_matches = [] - - # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/3809435/713980 - url_regex = ( - r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)" - ) - oras_regex = r"oras:\/\/[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)" - # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/39672069/713980 - docker_regex = r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(?(?(?:.(?!(?(?(?:.(?!(? list[str]: - """ - Helper function that takes a list of container images (URLs and Docker URIs), - eliminates all Docker URIs for which also a URL is contained and returns the - cleaned and also deduplicated list. - - Conceptually, this works like so: - - Everything after the last Slash should be identical, e.g. "scanpy:1.7.2--pyhdfd78af_0" in - ['https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0', 'biocontainers/scanpy:1.7.2--pyhdfd78af_0'] - - - re.sub('.*/(.*)','\\1',c) will drop everything up to the last slash from c (container_id) - - d.get(k:=re.sub('.*/(.*)','\\1',c),'') assigns the truncated string to k (key) and gets the - corresponding value from the dict if present or else defaults to "". - - If the regex pattern matches, the original container_id will be assigned to the dict with the k key. - r"^$|(?!^http)" matches an empty string (we didn't have it in the dict yet and want to keep it in either case) or - any string that does not start with http. Because if our current dict value already starts with http, - we want to keep it and not replace with with whatever we have now (which might be the Docker URI). - - A regex that matches http, r"^$|^http" could thus be used to prioritize the Docker URIs over http Downloads - - We also need to handle a special case: The https:// Singularity downloads from Seqera Containers all end in 'data', although - they are not equivalent, e.g.: - - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data' - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data' - - Lastly, we want to remove at least a few Docker URIs for those modules, that have an oras:// download link. - """ - d: dict[str, str] = {} - seqera_containers_http: list[str] = [] - seqera_containers_oras: list[str] = [] - all_others: list[str] = [] - - for c in container_list: - if bool(re.search(r"/data$", c)): - seqera_containers_http.append(c) - elif bool(re.search(r"^oras://", c)): - seqera_containers_oras.append(c) - else: - all_others.append(c) - - for c in all_others: - if re.match(r"^$|(?!^http)", d.get(k := re.sub(".*/(.*)", "\\1", c), "")): - log.debug(f"{c} matches and will be saved as {k}") - d[k] = c - - combined_with_oras = self.reconcile_seqera_container_uris(seqera_containers_oras, list(d.values())) - - # combine deduplicated others (Seqera containers oras, http others and Docker URI others) and Seqera containers http - return sorted(list(set(combined_with_oras + seqera_containers_http))) - - @staticmethod - def reconcile_seqera_container_uris(prioritized_container_list: list[str], other_list: list[str]) -> list[str]: - """ - Helper function that takes a list of Seqera container URIs, - extracts the software string and builds a regex from them to filter out - similar containers from the second container list. - - prioritzed_container_list = [ - ... "oras://community.wave.seqera.io/library/multiqc:1.25.1--f0e743d16869c0bf", - ... "oras://community.wave.seqera.io/library/multiqc_pip_multiqc-plugins:e1f4877f1515d03c" - ... ] - - will be cleaned to - - ['library/multiqc:1.25.1', 'library/multiqc_pip_multiqc-plugins'] - - Subsequently, build a regex from those and filter out matching duplicates in other_list: - """ - if not prioritized_container_list: - return other_list - else: - # trim the URIs to the stem that contains the tool string, assign with Walrus operator to account for non-matching patterns - trimmed_priority_list = [ - match.group() - for c in set(prioritized_container_list) - if (match := re.search(r"library/.*?:[\d.]+", c) if "--" in c else re.search(r"library/[^\s:]+", c)) - ] - - # build regex - prioritized_containers = re.compile("|".join(f"{re.escape(c)}" for c in trimmed_priority_list)) - - # filter out matches in other list - filtered_containers = [c for c in other_list if not re.search(prioritized_containers, c)] - - # combine prioritized and regular container lists - return sorted(list(set(prioritized_container_list + filtered_containers))) - - def gather_registries(self, workflow_directory: str) -> None: - """Fetch the registries from the pipeline config and CLI arguments and store them in a set. - This is needed to symlink downloaded container images so Nextflow will find them. - """ - - # should exist, because find_container_images() is always called before - if not self.nf_config: - self.nf_config = nf_core.utils.fetch_wf_config(Path(workflow_directory)) - - # Select registries defined in pipeline config - configured_registries = [ - "apptainer.registry", - "docker.registry", - "podman.registry", - "singularity.registry", - ] - - for registry in configured_registries: - if registry in self.nf_config: - self.registry_set.add(self.nf_config[registry]) - - # add depot.galaxyproject.org to the set, because it is the default registry for singularity hardcoded in modules - self.registry_set.add("depot.galaxyproject.org/singularity") - - # add community.wave.seqera.io/library to the set to support the new Seqera Docker container registry - self.registry_set.add("community.wave.seqera.io/library") - - # add chttps://community-cr-prod.seqera.io/docker/registry/v2/ to the set to support the new Seqera Singularity container registry - self.registry_set.add("community-cr-prod.seqera.io/docker/registry/v2") - - def symlink_singularity_images(self, image_out_path: str) -> None: - """Create a symlink for each registry in the registry set that points to the image. - We have dropped the explicit registries from the modules in favor of the configurable registries. - Unfortunately, Nextflow still expects the registry to be part of the file name, so a symlink is needed. - - The base image, e.g. ./nf-core-gatk-4.4.0.0.img will thus be symlinked as for example ./quay.io-nf-core-gatk-4.4.0.0.img - by prepending all registries in self.registry_set to the image name. - - Unfortunately, out output image name may contain a registry definition (Singularity image pulled from depot.galaxyproject.org - or older pipeline version, where the docker registry was part of the image name in the modules). Hence, it must be stripped - before to ensure that it is really the base name. - """ - - if self.registry_set: - # Create a regex pattern from the set, in case trimming is needed. - trim_pattern = "|".join(f"^{re.escape(registry)}-?".replace("/", "[/-]") for registry in self.registry_set) - - for registry in self.registry_set: - # Nextflow will convert it like this as well, so we need it mimic its behavior - registry = registry.replace("/", "-") - - if not bool(re.search(trim_pattern, os.path.basename(image_out_path))): - symlink_name = os.path.join("./", f"{registry}-{os.path.basename(image_out_path)}") - else: - trimmed_name = re.sub(f"{trim_pattern}", "", os.path.basename(image_out_path)) - symlink_name = os.path.join("./", f"{registry}-{trimmed_name}") - - symlink_full = os.path.join(os.path.dirname(image_out_path), symlink_name) - target_name = os.path.join("./", os.path.basename(image_out_path)) - - if not os.path.exists(symlink_full) and target_name != symlink_name: - os.makedirs(os.path.dirname(symlink_full), exist_ok=True) - image_dir = os.open(os.path.dirname(image_out_path), os.O_RDONLY) - try: - os.symlink( - target_name, - symlink_name, - dir_fd=image_dir, - ) - log.debug(f"Symlinked {target_name} as {symlink_name}.") - finally: - os.close(image_dir) - - def get_singularity_images(self, current_revision: str = "") -> None: - """Loop through container names and download Singularity images""" - - if len(self.containers) == 0: - log.info("No container names found in workflow") - else: - log.info( - f"Processing workflow revision {current_revision}, found {len(self.containers)} container image{'s' if len(self.containers) > 1 else ''} in total." - ) - - with DownloadProgress() as progress: - task = progress.add_task( - "Collecting container images", - total=len(self.containers), - progress_type="summary", - ) - - # Organise containers based on what we need to do with them - containers_exist: list[str] = [] - containers_cache: list[tuple[str, str, Optional[str]]] = [] - containers_download: list[tuple[str, str, Optional[str]]] = [] - containers_pull: list[tuple[str, str, Optional[str]]] = [] - for container in self.containers: - # Fetch the output and cached filenames for this container - out_path, cache_path = self.singularity_image_filenames(container) - - # Check that the directories exist - out_path_dir = os.path.dirname(out_path) - if not os.path.isdir(out_path_dir): - log.debug(f"Output directory not found, creating: {out_path_dir}") - os.makedirs(out_path_dir) - if cache_path: - cache_path_dir = os.path.dirname(cache_path) - if not os.path.isdir(cache_path_dir): - log.debug(f"Cache directory not found, creating: {cache_path_dir}") - os.makedirs(cache_path_dir) - - # We already have the target file in place or in remote cache, return - if os.path.exists(out_path) or os.path.basename(out_path) in self.containers_remote: - containers_exist.append(container) - continue - - # We have a copy of this in the NXF_SINGULARITY_CACHE dir - if cache_path and os.path.exists(cache_path): - containers_cache.append((container, out_path, cache_path)) - continue - - # Direct download within Python - if container.startswith("http"): - containers_download.append((container, out_path, cache_path)) - continue - - # Pull using singularity - containers_pull.append((container, out_path, cache_path)) - - # Exit if we need to pull images and Singularity is not installed - if len(containers_pull) > 0: - if not (shutil.which("singularity") or shutil.which("apptainer")): - raise OSError( - "Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH" - ) - - if containers_exist: - if self.container_cache_index is not None: - log.info( - f"{len(containers_exist)} containers are already cached remotely and won't be retrieved." - ) - # Go through each method of fetching containers in order - for container in containers_exist: - progress.update(task, description="Image file exists at destination") - progress.update(task, advance=1) - - if containers_cache: - for container in containers_cache: - progress.update(task, description="Copying singularity images from cache") - self.singularity_copy_cache_image(*container) - progress.update(task, advance=1) - - if containers_download or containers_pull: - # if clause gives slightly better UX, because Download is no longer displayed if nothing is left to be downloaded. - with concurrent.futures.ThreadPoolExecutor(max_workers=self.parallel_downloads) as pool: - progress.update(task, description="Downloading singularity images") - - # Kick off concurrent downloads - future_downloads = [ - pool.submit(self.singularity_download_image, *containers, progress) - for containers in containers_download - ] - - # Make ctrl-c work with multi-threading - self.kill_with_fire = False - - try: - # Iterate over each threaded download, waiting for them to finish - for future in concurrent.futures.as_completed(future_downloads): - future.result() - try: - progress.update(task, advance=1) - except Exception as e: - log.error(f"Error updating progress bar: {e}") - - except KeyboardInterrupt: - # Cancel the future threads that haven't started yet - for future in future_downloads: - future.cancel() - # Set the variable that the threaded function looks for - # Will trigger an exception from each thread - self.kill_with_fire = True - # Re-raise exception on the main thread - raise - - for containers in containers_pull: - progress.update(task, description="Pulling singularity images") - # it is possible to try multiple registries / mirrors if multiple were specified. - # Iteration happens over a copy of self.container_library[:], as I want to be able to remove failing registries for subsequent images. - for library in self.container_library[:]: - try: - self.singularity_pull_image(*containers, library, progress) - # Pulling the image was successful, no ContainerError was raised, break the library loop - break - except ContainerError.ImageExistsError: - # Pulling not required - break - except ContainerError.RegistryNotFoundError as e: - self.container_library.remove(library) - # The only library was removed - if not self.container_library: - log.error(e.message) - log.error(e.helpmessage) - raise OSError from e - else: - # Other libraries can be used - continue - except ContainerError.ImageNotFoundError as e: - # Try other registries - if e.error_log.absolute_URI: - break # there no point in trying other registries if absolute URI was specified. - else: - continue - except ContainerError.InvalidTagError: - # Try other registries - continue - except ContainerError.OtherError as e: - # Try other registries - log.error(e.message) - log.error(e.helpmessage) - if e.error_log.absolute_URI: - break # there no point in trying other registries if absolute URI was specified. - else: - continue - else: - # The else clause executes after the loop completes normally. - # This means the library loop completed without breaking, indicating failure for all libraries (registries) - log.error( - f"Not able to pull image of {containers}. Service might be down or internet connection is dead." - ) - # Task should advance in any case. Failure to pull will not kill the download process. - progress.update(task, advance=1) - - def singularity_image_filenames(self, container: str) -> tuple[str, Optional[str]]: - """Check Singularity cache for image, copy to destination folder if found. - - Args: - container (str): A pipeline's container name. Can be direct download URL - or a Docker Hub repository ID. - - Returns: - tuple (str, str): Returns a tuple of (out_path, cache_path). - out_path is the final target output path. it may point to the NXF_SINGULARITY_CACHEDIR, if cache utilisation was set to 'amend'. - If cache utilisation was set to 'copy', it will point to the target folder, a subdirectory of the output directory. In the latter case, - cache_path may either be None (image is not yet cached locally) or point to the image in the NXF_SINGULARITY_CACHEDIR, so it will not be - downloaded from the web again, but directly copied from there. See get_singularity_images() for implementation. - """ - - # Generate file paths - # Based on simpleName() function in Nextflow code: - # https://github.com/nextflow-io/nextflow/blob/671ae6d85df44f906747c16f6d73208dbc402d49/modules/nextflow/src/main/groovy/nextflow/container/SingularityCache.groovy#L69-L94 - out_name = container - # Strip URI prefix - out_name = re.sub(r"^.*:\/\/", "", out_name) - # Detect file extension - extension = ".img" - if ".sif:" in out_name: - extension = ".sif" - out_name = out_name.replace(".sif:", "-") - elif out_name.endswith(".sif"): - extension = ".sif" - out_name = out_name[:-4] - # Strip : and / characters - out_name = out_name.replace("/", "-").replace(":", "-") - # Add file extension - out_name = out_name + extension - - # Trim potential registries from the name for consistency. - # This will allow pipelines to work offline without symlinked images, - # if docker.registry / singularity.registry are set to empty strings at runtime, which can be included in the HPC config profiles easily. - if self.registry_set: - # Create a regex pattern from the set of registries - trim_pattern = "|".join(f"^{re.escape(registry)}-?".replace("/", "[/-]") for registry in self.registry_set) - # Use the pattern to trim the string - out_name = re.sub(f"{trim_pattern}", "", out_name) - - # Full destination and cache paths - out_path = os.path.abspath(os.path.join(self.outdir, "singularity-images", out_name)) - cache_path = None - if os.environ.get("NXF_SINGULARITY_CACHEDIR"): - cache_path = os.path.join(os.environ["NXF_SINGULARITY_CACHEDIR"], out_name) - # Use only the cache - set this as the main output path - if self.container_cache_utilisation == "amend": - out_path = cache_path - cache_path = None - elif self.container_cache_utilisation in ["amend", "copy"]: - raise FileNotFoundError("Singularity cache is required but no '$NXF_SINGULARITY_CACHEDIR' set!") - - return (out_path, cache_path) - - def singularity_copy_cache_image(self, container: str, out_path: str, cache_path: Optional[str]) -> None: - """Copy Singularity image from NXF_SINGULARITY_CACHEDIR to target folder.""" - # Copy to destination folder if we have a cached version - if cache_path and os.path.exists(cache_path): - log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") - shutil.copyfile(cache_path, out_path) - # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(out_path) - - def singularity_download_image( - self, container: str, out_path: str, cache_path: Optional[str], progress: DownloadProgress - ) -> None: - """Download a singularity image from the web. - - Use native Python to download the file. - - Args: - container (str): A pipeline's container name. Usually it is of similar format - to ``https://depot.galaxyproject.org/singularity/name:version`` - out_path (str): The final target output path - cache_path (str, None): The NXF_SINGULARITY_CACHEDIR path if set, None if not - progress (Progress): Rich progress bar instance to add tasks to. - """ - log.debug(f"Downloading Singularity image: '{container}'") - - # Set output path to save file to - output_path = cache_path or out_path - output_path_tmp = f"{output_path}.partial" - log.debug(f"Downloading to: '{output_path_tmp}'") - - # Set up progress bar - nice_name = container.split("/")[-1][:50] - task = progress.add_task(nice_name, start=False, total=False, progress_type="download") - try: - # Delete temporary file if it already exists - if os.path.exists(output_path_tmp): - os.remove(output_path_tmp) - - # Open file handle and download - with open(output_path_tmp, "wb") as fh: - # Disable caching as this breaks streamed downloads - with requests_cache.disabled(): - r = requests.get(container, allow_redirects=True, stream=True, timeout=60 * 5) - filesize = r.headers.get("Content-length") - if filesize: - progress.update(task, total=int(filesize)) - progress.start_task(task) - - # Stream download - for data in r.iter_content(chunk_size=io.DEFAULT_BUFFER_SIZE): - # Check that the user didn't hit ctrl-c - if self.kill_with_fire: - raise KeyboardInterrupt - progress.update(task, advance=len(data)) - fh.write(data) - - # Rename partial filename to final filename - os.rename(output_path_tmp, output_path) - - # Copy cached download if we are using the cache - if cache_path: - log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") - progress.update(task, description="Copying from cache to target directory") - shutil.copyfile(cache_path, out_path) - self.symlink_singularity_images(cache_path) # symlinks inside the cache directory - - # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(out_path) - - progress.remove_task(task) - - except: - # Kill the progress bars - for t in progress.task_ids: - progress.remove_task(t) - # Try to delete the incomplete download - log.debug(f"Deleting incompleted singularity image download:\n'{output_path_tmp}'") - if output_path_tmp and os.path.exists(output_path_tmp): - os.remove(output_path_tmp) - if output_path and os.path.exists(output_path): - os.remove(output_path) - # Re-raise the caught exception - raise - finally: - del output_path_tmp - - def singularity_pull_image( - self, container: str, out_path: str, cache_path: Optional[str], library: list[str], progress: DownloadProgress - ) -> None: - """Pull a singularity image using ``singularity pull`` - - Attempt to use a local installation of singularity to pull the image. - - Args: - container (str): A pipeline's container name. Usually it is of similar format - to ``nfcore/name:version``. - library (list of str): A list of libraries to try for pulling the image. - - Raises: - Various exceptions possible from `subprocess` execution of Singularity. - """ - output_path = cache_path or out_path - - # where the output of 'singularity pull' is first generated before being copied to the NXF_SINGULARITY_CACHDIR. - # if not defined by the Singularity administrators, then use the temporary directory to avoid storing the images in the work directory. - if os.environ.get("SINGULARITY_CACHEDIR") is None: - os.environ["SINGULARITY_CACHEDIR"] = str(NFCORE_CACHE_DIR) - - # Sometimes, container still contain an explicit library specification, which - # resulted in attempted pulls e.g. from docker://quay.io/quay.io/qiime2/core:2022.11 - # Thus, if an explicit registry is specified, the provided -l value is ignored. - # Additionally, check if the container to be pulled is native Singularity: oras:// protocol. - container_parts = container.split("/") - if len(container_parts) > 2: - address = container if container.startswith("oras://") else f"docker://{container}" - absolute_URI = True - else: - address = f"docker://{library}/{container.replace('docker://', '')}" - absolute_URI = False - - if shutil.which("singularity"): - singularity_command = [ - "singularity", - "pull", - "--name", - output_path, - address, - ] - elif shutil.which("apptainer"): - singularity_command = ["apptainer", "pull", "--name", output_path, address] - else: - raise OSError("Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH") - log.debug(f"Building singularity image: {address}") - log.debug(f"Singularity command: {' '.join(singularity_command)}") - - # Progress bar to show that something is happening - task = progress.add_task( - container, - start=False, - total=False, - progress_type="singularity_pull", - current_log="", - ) - - # Run the singularity pull command - with subprocess.Popen( - singularity_command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - universal_newlines=True, - bufsize=1, - ) as proc: - lines = [] - if proc.stdout is not None: - for line in proc.stdout: - lines.append(line) - progress.update(task, current_log=line.strip()) - - if lines: - # something went wrong with the container retrieval - if any("FATAL: " in line for line in lines): - progress.remove_task(task) - raise ContainerError( - container=container, - registry=library, - address=address, - absolute_URI=absolute_URI, - out_path=out_path if out_path else cache_path or "", - singularity_command=singularity_command, - error_msg=lines, - ) - - # Copy cached download if we are using the cache - if cache_path: - log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") - progress.update(task, current_log="Copying from cache to target directory") - shutil.copyfile(cache_path, out_path) - self.symlink_singularity_images(cache_path) # symlinks inside the cache directory - - # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(out_path) - - progress.remove_task(task) - - def compress_download(self): - """Take the downloaded files and make a compressed .tar.gz archive.""" - log.debug(f"Creating archive: {self.output_filename}") - - # .tar.gz and .tar.bz2 files - if self.compress_type in ["tar.gz", "tar.bz2"]: - ctype = self.compress_type.split(".")[1] - with tarfile.open(self.output_filename, f"w:{ctype}") as tar: - tar.add(self.outdir, arcname=os.path.basename(self.outdir)) - tar_flags = "xzf" if ctype == "gz" else "xjf" - log.info(f"Command to extract files: [bright_magenta]tar -{tar_flags} {self.output_filename}[/]") - - # .zip files - if self.compress_type == "zip": - with ZipFile(self.output_filename, "w") as zip_file: - # Iterate over all the files in directory - for folder_name, _, filenames in os.walk(self.outdir): - for filename in filenames: - # create complete filepath of file in directory - file_path = os.path.join(folder_name, filename) - # Add file to zip - zip_file.write(file_path) - log.info(f"Command to extract files: [bright_magenta]unzip {self.output_filename}[/]") - - # Delete original files - log.debug(f"Deleting uncompressed files: '{self.outdir}'") - shutil.rmtree(self.outdir) - - # Calculate md5sum for output file - log.info(f"MD5 checksum for '{self.output_filename}': [blue]{nf_core.utils.file_md5(self.output_filename)}[/]") - - -class WorkflowRepo(SyncedRepo): - """ - An object to store details about a locally cached workflow repository. - - Important Attributes: - fullname: The full name of the repository, ``nf-core/{self.pipelinename}``. - local_repo_dir (str): The local directory, where the workflow is cloned into. Defaults to ``$HOME/.cache/nf-core/nf-core/{self.pipeline}``. - - """ - - def __init__( - self, - remote_url, - revision, - commit, - additional_tags, - location=None, - hide_progress=False, - in_cache=True, - ): - """ - Initializes the object and clones the workflows git repository if it is not already present - - Args: - remote_url (str): The URL of the remote repository. Defaults to None. - self.revision (list of str): The revisions to include. A list of strings. - commits (dict of str): The checksums to linked with the revisions. - no_pull (bool, optional): Whether to skip the pull step. Defaults to False. - hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. - in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. - """ - self.remote_url = remote_url - if isinstance(revision, str): - self.revision = [revision] - elif isinstance(revision, list): - self.revision = [*revision] - else: - self.revision = [] - if isinstance(commit, str): - self.commit = [commit] - elif isinstance(commit, list): - self.commit = [*commit] - else: - self.commit = [] - self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) - self.retries = 0 # retries for setting up the locally cached repository - self.hide_progress = hide_progress - - self.setup_local_repo(remote=remote_url, location=location, in_cache=in_cache) - - # additional tags to be added to the repository - self.additional_tags = additional_tags if additional_tags else None - - def __repr__(self): - """Called by print, creates representation of object""" - return f"" - - @property - def heads(self): - return self.repo.heads - - @property - def tags(self): - return self.repo.tags - - def access(self): - if os.path.exists(self.local_repo_dir): - return self.local_repo_dir - else: - return None - - def checkout(self, commit): - return super().checkout(commit) - - def get_remote_branches(self, remote_url): - return super().get_remote_branches(remote_url) - - def retry_setup_local_repo(self, skip_confirm=False): - self.retries += 1 - if skip_confirm or rich.prompt.Confirm.ask( - f"[violet]Delete local cache '{self.local_repo_dir}' and try again?" - ): - if ( - self.retries > 1 - ): # One unconfirmed retry is acceptable, but prevent infinite loops without user interaction. - raise DownloadError( - f"Errors with locally cached repository of '{self.fullname}'. Please delete '{self.local_repo_dir}' manually and try again." - ) - if not skip_confirm: # Feedback to user for manual confirmation. - log.info(f"Removing '{self.local_repo_dir}'") - shutil.rmtree(self.local_repo_dir) - self.setup_local_repo(self.remote_url, in_cache=False) - else: - raise DownloadError("Exiting due to error with locally cached Git repository.") - - def setup_local_repo(self, remote, location=None, in_cache=True): - """ - Sets up the local git repository. If the repository has been cloned previously, it - returns a git.Repo object of that clone. Otherwise it tries to clone the repository from - the provided remote URL and returns a git.Repo of the new clone. - - Args: - remote (str): git url of remote - location (Path): location where the clone should be created/cached. - in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. - Sets self.repo - """ - if location: - self.local_repo_dir = os.path.join(location, self.fullname) - else: - self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) - - try: - if not os.path.exists(self.local_repo_dir): - try: - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - disable=os.environ.get("HIDE_PROGRESS", None) is not None or self.hide_progress, - ) - with pbar: - self.repo = git.Repo.clone_from( - remote, - self.local_repo_dir, - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), - ) - super().update_local_repo_status(self.fullname, True) - except GitCommandError: - raise DownloadError(f"Failed to clone from the remote: `{remote}`") - else: - self.repo = git.Repo(self.local_repo_dir) - - if super().no_pull_global: - super().update_local_repo_status(self.fullname, True) - # If the repo is already cloned, fetch the latest changes from the remote - if not super().local_repo_synced(self.fullname): - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - disable=os.environ.get("HIDE_PROGRESS", None) is not None or self.hide_progress, - ) - with pbar: - self.repo.remotes.origin.fetch( - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") - ) - super().update_local_repo_status(self.fullname, True) - - except (GitCommandError, InvalidGitRepositoryError) as e: - log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") - self.retry_setup_local_repo() - - def tidy_tags_and_branches(self): - """ - Function to delete all tags and branches that are not of interest to the downloader. - This allows a clutter-free experience in Seqera Platform. The untagged commits are evidently still available. - - However, due to local caching, the downloader might also want access to revisions that had been deleted before. - In that case, don't bother with re-adding the tags and rather download anew from Github. - """ - if self.revision and self.repo and self.repo.tags: - # create a set to keep track of the revisions to process & check - desired_revisions = set(self.revision) - - # determine what needs pruning - tags_to_remove = {tag for tag in self.repo.tags if tag.name not in desired_revisions.union({"latest"})} - heads_to_remove = {head for head in self.repo.heads if head.name not in desired_revisions.union({"latest"})} - - try: - # delete unwanted tags from repository - for tag in tags_to_remove: - self.repo.delete_tag(tag) - - # switch to a revision that should be kept, because deleting heads fails, if they are checked out (e.g. "main") - self.checkout(self.revision[0]) - - # delete unwanted heads/branches from repository - for head in heads_to_remove: - self.repo.delete_head(head) - - # ensure all desired revisions/branches are available - for revision in desired_revisions: - if not self.repo.is_valid_object(revision): - self.checkout(revision) - self.repo.create_head(revision, revision) - if self.repo.head.is_detached: - self.repo.head.reset(index=True, working_tree=True) - - # no branch exists, but one is required for Seqera Platform's UI to display revisions correctly). Thus, "latest" will be created. - if not bool(self.repo.heads): - if self.repo.is_valid_object("latest"): - # "latest" exists as tag but not as branch - self.repo.create_head("latest", "latest") # create a new head for latest - self.checkout("latest") - else: - # desired revisions may contain arbitrary branch names that do not correspond to valid semantic versioning patterns. - valid_versions = [ - Version(v) for v in desired_revisions if re.match(r"\d+\.\d+(?:\.\d+)*(?:[\w\-_])*", v) - ] - # valid versions sorted in ascending order, last will be aliased as "latest". - latest = sorted(valid_versions)[-1] - self.repo.create_head("latest", str(latest)) - self.checkout(latest) - if self.repo.head.is_detached: - self.repo.head.reset(index=True, working_tree=True) - - # Apply the custom additional tags to the repository - self.__add_additional_tags() - - # get all tags and available remote_branches - completed_revisions = {revision.name for revision in self.repo.heads + self.repo.tags} - - # verify that all requested revisions are available. - # a local cache might lack revisions that were deleted during a less comprehensive previous download. - if bool(desired_revisions - completed_revisions): - log.info( - f"Locally cached version of the pipeline lacks selected revisions {', '.join(desired_revisions - completed_revisions)}. Downloading anew from GitHub..." - ) - self.retry_setup_local_repo(skip_confirm=True) - self.tidy_tags_and_branches() - except (GitCommandError, InvalidGitRepositoryError) as e: - log.error(f"[red]Adapting your pipeline download unfortunately failed:[/]\n{e}\n") - self.retry_setup_local_repo(skip_confirm=True) - raise DownloadError(e) from e - - # "Private" method to add the additional custom tags to the repository. - def __add_additional_tags(self) -> None: - if self.additional_tags: - # example.com is reserved by the Internet Assigned Numbers Authority (IANA) as special-use domain names for documentation purposes. - # Although "dev-null" is a syntactically-valid local-part that is equally valid for delivery, - # and only the receiving MTA can decide whether to accept it, it is to my best knowledge configured with - # a Postfix discard mail delivery agent (https://www.postfix.org/discard.8.html), so incoming mails should be sinkholed. - self.ensure_git_user_config(f"nf-core pipelines download v{nf_core.__version__}", "dev-null@example.com") - - for additional_tag in self.additional_tags: - # A valid git branch or tag name can contain alphanumeric characters, underscores, hyphens, and dots. - # But it must not start with a dot, hyphen or underscore and also cannot contain two consecutive dots. - if re.match(r"^\w[\w_.-]+={1}\w[\w_.-]+$", additional_tag) and ".." not in additional_tag: - anchor, tag = additional_tag.split("=") - if self.repo.is_valid_object(anchor) and not self.repo.is_valid_object(tag): - try: - self.repo.create_tag( - tag, - ref=anchor, - message=f"Synonynmous tag to {anchor}; added by `nf-core pipelines download`.", - ) - except (GitCommandError, InvalidGitRepositoryError) as e: - log.error(f"[red]Additional tag(s) could not be applied:[/]\n{e}\n") - else: - if not self.repo.is_valid_object(anchor): - log.error( - f"[red]Adding tag '{tag}' to '{anchor}' failed.[/]\n Mind that '{anchor}' must be a valid git reference that resolves to a commit." - ) - if self.repo.is_valid_object(tag): - log.error( - f"[red]Adding tag '{tag}' to '{anchor}' failed.[/]\n Mind that '{tag}' must not exist hitherto." - ) - else: - log.error(f"[red]Could not apply invalid `--tag` specification[/]: '{additional_tag}'") - - def bare_clone(self, destination): - if self.repo: - try: - destfolder = os.path.abspath(destination) - if not os.path.exists(destfolder): - os.makedirs(destfolder) - if os.path.exists(destination): - shutil.rmtree(os.path.abspath(destination)) - self.repo.clone(os.path.abspath(destination), bare=True) - except (OSError, GitCommandError, InvalidGitRepositoryError) as e: - log.error(f"[red]Failure to create the pipeline download[/]\n{e}\n") - - -# Distinct errors for the container download, required for acting on the exceptions - - -class ContainerError(Exception): - """A class of errors related to pulling containers with Singularity/Apptainer""" - - def __init__( - self, - container, - registry, - address, - absolute_URI, - out_path, - singularity_command, - error_msg, - ): - self.container = container - self.registry = registry - self.address = address - self.absolute_URI = absolute_URI - self.out_path = out_path - self.singularity_command = singularity_command - self.error_msg = error_msg - - for line in error_msg: - if re.search(r"dial\stcp.*no\ssuch\shost", line): - self.error_type = self.RegistryNotFoundError(self) - break - elif ( - re.search(r"requested\saccess\sto\sthe\sresource\sis\sdenied", line) - or re.search(r"StatusCode:\s404", line) - or re.search(r"400|Bad\s?Request", line) - or re.search(r"invalid\sstatus\scode\sfrom\sregistry\s400", line) - ): - # Unfortunately, every registry seems to return an individual error here: - # Docker.io: denied: requested access to the resource is denied - # unauthorized: authentication required - # Quay.io: StatusCode: 404, \n'] - # ghcr.io: Requesting bearer token: invalid status code from registry 400 (Bad Request) - self.error_type = self.ImageNotFoundError(self) - break - elif re.search(r"manifest\sunknown", line): - self.error_type = self.InvalidTagError(self) - break - elif re.search(r"ORAS\sSIF\simage\sshould\shave\sa\ssingle\slayer", line): - self.error_type = self.NoSingularityContainerError(self) - break - elif re.search(r"Image\sfile\salready\sexists", line): - self.error_type = self.ImageExistsError(self) - break - else: - continue - else: - self.error_type = self.OtherError(self) - - log.error(self.error_type.message) - log.info(self.error_type.helpmessage) - log.debug(f"Failed command:\n{' '.join(singularity_command)}") - log.debug(f"Singularity error messages:\n{''.join(error_msg)}") - - raise self.error_type - - class RegistryNotFoundError(ConnectionRefusedError): - """The specified registry does not resolve to a valid IP address""" - - def __init__(self, error_log): - self.error_log = error_log - self.message = ( - f'[bold red]The specified container library "{self.error_log.registry}" is invalid or unreachable.[/]\n' - ) - self.helpmessage = ( - f'Please check, if you made a typo when providing "-l / --library {self.error_log.registry}"\n' - ) - super().__init__(self.message, self.helpmessage, self.error_log) - - class ImageNotFoundError(FileNotFoundError): - """The image can not be found in the registry""" - - def __init__(self, error_log): - self.error_log = error_log - if not self.error_log.absolute_URI: - self.message = ( - f'[bold red]"Pulling "{self.error_log.container}" from "{self.error_log.address}" failed.[/]\n' - ) - self.helpmessage = f'Saving image of "{self.error_log.container}" failed.\nPlease troubleshoot the command \n"{" ".join(self.error_log.singularity_command)}" manually.f\n' - else: - self.message = f'[bold red]"The pipeline requested the download of non-existing container image "{self.error_log.address}"[/]\n' - self.helpmessage = f'Please try to rerun \n"{" ".join(self.error_log.singularity_command)}" manually with a different registry.f\n' - - super().__init__(self.message) - - class InvalidTagError(AttributeError): - """Image and registry are valid, but the (version) tag is not""" - - def __init__(self, error_log): - self.error_log = error_log - self.message = f'[bold red]"{self.error_log.address.split(":")[-1]}" is not a valid tag of "{self.error_log.container}"[/]\n' - self.helpmessage = f'Please chose a different library than {self.error_log.registry}\nor try to locate the "{self.error_log.address.split(":")[-1]}" version of "{self.error_log.container}" manually.\nPlease troubleshoot the command \n"{" ".join(self.error_log.singularity_command)}" manually.\n' - super().__init__(self.message) - - class ImageExistsError(FileExistsError): - """Image already exists in cache/output directory.""" - - def __init__(self, error_log): - self.error_log = error_log - self.message = ( - f'[bold red]"{self.error_log.container}" already exists at destination and cannot be pulled[/]\n' - ) - self.helpmessage = f'Saving image of "{self.error_log.container}" failed, because "{self.error_log.out_path}" exists.\nPlease troubleshoot the command \n"{" ".join(self.error_log.singularity_command)}" manually.\n' - super().__init__(self.message) - - class NoSingularityContainerError(RuntimeError): - """The container image is no native Singularity Image Format.""" - - def __init__(self, error_log): - self.error_log = error_log - self.message = ( - f'[bold red]"{self.error_log.container}" is no valid Singularity Image Format container.[/]\n' - ) - self.helpmessage = f"Pulling \"{self.error_log.container}\" failed, because it appears invalid. To convert from Docker's OCI format, prefix the URI with 'docker://' instead of 'oras://'.\n" - super().__init__(self.message) - - class OtherError(RuntimeError): - """Undefined error with the container""" - - def __init__(self, error_log): - self.error_log = error_log - if not self.error_log.absolute_URI: - self.message = f'[bold red]"{self.error_log.container}" failed for unclear reasons.[/]\n' - self.helpmessage = f'Pulling of "{self.error_log.container}" failed.\nPlease troubleshoot the command \n"{" ".join(self.error_log.singularity_command)}" manually.\n' - else: - self.message = f'[bold red]"The pipeline requested the download of non-existing container image "{self.error_log.address}"[/]\n' - self.helpmessage = f'Please try to rerun \n"{" ".join(self.error_log.singularity_command)}" manually with a different registry.f\n' - - super().__init__(self.message, self.helpmessage, self.error_log) diff --git a/nf_core/pipelines/download/__init__.py b/nf_core/pipelines/download/__init__.py new file mode 100644 index 0000000000..7df3cf312d --- /dev/null +++ b/nf_core/pipelines/download/__init__.py @@ -0,0 +1 @@ +from .download import DownloadWorkflow diff --git a/nf_core/pipelines/download/container_fetcher.py b/nf_core/pipelines/download/container_fetcher.py new file mode 100644 index 0000000000..748404cc97 --- /dev/null +++ b/nf_core/pipelines/download/container_fetcher.py @@ -0,0 +1,447 @@ +import contextlib +import logging +import re +import shutil +from abc import ABC, abstractmethod +from collections.abc import Collection, Container, Generator, Iterable +from pathlib import Path +from typing import Callable, Optional, Union + +import rich.progress + +from nf_core.pipelines.download.utils import intermediate_file + +log = logging.getLogger(__name__) + + +class ContainerProgress(rich.progress.Progress): + """ + Custom Progress bar class, allowing us to have two progress + bars with different columns / layouts. + Also provide helper functions to control the top-level task. + """ + + main_task: Optional[rich.progress.TaskID] = None + remote_fetch_task: Optional[rich.progress.TaskID] = None + remote_fetch_task_containers: Optional[list[str]] = [] + copy_task: Optional[rich.progress.TaskID] = None + copy_task_containers: Optional[list[str]] = [] + + def __init__(self, disable=False): + super().__init__(disable=disable) + + def get_task_types_and_columns(self): + """ + Gets the possible task types for the progress bar. + """ + task_types_and_columns = { + "summary": ( + "[magenta]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.0f}%", + "•", + "[green]{task.completed}/{task.total} tasks completed", + ), + "remote_fetch": ( + "[cyan]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.0f}%", + "•", + "[green]{task.completed}/{task.total} tasks completed", + ), + "copy": ( + "[steel_blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.0f}%", + "•", + "[green]{task.completed}/{task.total} tasks completed", + ), + } + return task_types_and_columns + + def get_renderables(self) -> Generator[rich.table.Table, None, None]: + self.columns: Iterable[Union[str, rich.progress.ProgressColumn]] + for task in self.tasks: + for task_type, columns in self.get_task_types_and_columns().items(): + if task.fields.get("progress_type") == task_type: + self.columns = columns + + yield self.make_tasks_table([task]) + + # These two functions allow callers not having to track the main TaskID + # They are pass-through functions to the rich.progress methods + def add_main_task(self, **kwargs) -> rich.progress.TaskID: + """ + Add a top-level task to the progress bar. + This task will be used to track the overall progress of the container downloads. + """ + self.main_task = self.add_task( + progress_type="summary", + description="Processing container images", + **kwargs, + ) + return self.main_task + + def update_main_task(self, **kwargs) -> None: + """ + Update the top-level task with new information. + """ + self.update(self.main_task, **kwargs) + + def remove_main_task(self) -> None: + """ + Remove the top-level task + """ + self.remove_task(self.main_task) + + def add_remote_fetch_task(self, total: int, **kwargs) -> rich.progress.TaskID: + """ + Add a task to the progress bar to track the progress of fetching remote containers. + """ + self.remote_fetch_task = self.add_task( + progress_type="remote_fetch", + total=total, + completed=0, + **kwargs, + ) + return self.remote_fetch_task + + def advance_remote_fetch_task(self) -> None: + """ + Advance the remote fetch task, and if the container should not + be copied then also advance the main task. + """ + self.update(self.remote_fetch_task, advance=1) + self.update_main_task(advance=1) + + def update_remote_fetch_task(self, **kwargs) -> None: + """ + Update the remote fetch task with new information + """ + self.update(self.remote_fetch_task, **kwargs) + + def remove_remote_fetch_task(self) -> None: + """ + Remove the remote fetch task + """ + self.remove_task(self.remote_fetch_task) + + def add_copy_task(self, total: int, **kwargs) -> rich.progress.TaskID: + """ + Add a task to the progress bar to track the progress of copying containers. + """ + self.copy_task = self.add_task( + progress_type="copy", + total=total, + completed=0, + **kwargs, + ) + return self.copy_task + + def advance_copy_task(self) -> None: + """ + Advance the copy task, and with it the main task. + """ + self.update(self.copy_task, advance=1) + self.update_main_task(advance=1) + + def update_copy_task(self, **kwargs) -> None: + """ + Update the remote fetch task with new information + """ + self.update(self.copy_task, **kwargs) + + def remove_copy_task(self) -> None: + """ + Remove the copy task + """ + self.remove_task(self.copy_task) + + @contextlib.contextmanager + def sub_task(self, *args, **kwargs) -> Generator[rich.progress.TaskID, None, None]: + """ + Context manager to create a sub-task under the main task. + """ + task = self.add_task(*args, **kwargs) + try: + yield task + finally: + self.remove_task(task) + + +class ContainerFetcher(ABC): + """ + Abstract class to manage all operations for fetching containers. + + It is currently subclasses by the SingularityFetcher and DockerFetcher classes, + for fetching Singularity and Docker containers respectively. + + The guiding principles are that: + - Container download/pull/copy methods are unaware of the concepts of + "library" and "cache". They are just told to fetch a container and + put it in a certain location. + - Only the `fetch_containers` method is aware of the concepts of "library" + and "cache". It is a sort of orchestrator that decides where to fetch + each container and calls the appropriate methods. + - All methods are integrated with a progress bar + + Args: + container_output_dir (Path): The final destination for the container images. + container_library (Iterable[str]): A collection of container libraries to use + registry_set (Iterable[str]): A collection of registries to consider + progress_factory (Callable[[], ContainerProgress]): A factory to create a progress bar. + library_dir (Optional[Path]): The directory to look for container images in. + cache_dir (Optional[Path]): A directory where container images might be cached. + amend_cachedir (bool): Whether to amend the cache directory with the container images. + parallel (int): The number of containers to fetch in parallel. + """ + + def __init__( + self, + container_output_dir: Path, + container_library: Iterable[str], + registry_set: Iterable[str], + progress_factory: Callable[[bool], ContainerProgress], + library_dir: Optional[Path], + cache_dir: Optional[Path], + amend_cachedir: bool, + parallel: int = 4, + hide_progress: bool = False, + ) -> None: + self._container_output_dir = container_output_dir + self.container_library = list(container_library) + self.registry_set = registry_set + self.kill_with_fire = False + self.implementation: Optional[str] = None + self.name = None + self.library_dir = library_dir + self.cache_dir = cache_dir + self.amend_cachedir = amend_cachedir + self.parallel = parallel + + self.hide_progress = hide_progress + self.progress_factory = progress_factory + self.progress: Optional[ContainerProgress] = None + + @property + def progress(self) -> rich.progress.Progress: + assert self._progress is not None # mypy + return self._progress + + @progress.setter + def progress(self, progress: Optional[ContainerProgress]) -> None: + self._progress = progress + + def get_container_output_dir(self) -> Path: + """ + Get the output directory for the container images. + """ + return self._container_output_dir + + @abstractmethod + def check_and_set_implementation(self) -> None: + """ + Check if the container system is installed and available. + + Should update the `self.implementation` attribute with the found implementation + + Raises: + OSError: If the container system is not installed or not in $PATH. + """ + pass + + @abstractmethod + def clean_container_file_extension(self, container_fn: str) -> str: + """ + Clean the file extension of a container filename. + + Example implementation: + + # Detect file extension + extension = ".img" + if ".sif:" in out_name: + extension = ".sif" + out_name = out_name.replace(".sif:", "-") + elif out_name.endswith(".sif"): + extension = ".sif" + out_name = out_name[:-4] + # Strip : and / characters + out_name = out_name.replace("/", "-").replace(":", "-") + # Add file extension + out_name = out_name + extension + + Args: + container_fn (str): The filename of the container. + Returns: + str: The cleaned filename with the appropriate extension. + """ + pass + + # We have dropped the explicit registries from the modules in favor of the configurable registries. + # Unfortunately, Nextflow still expects the registry to be part of the file name, so we need functions + # to support accessing container images with different registries (or no registry). + def get_container_filename(self, container: str) -> str: + """Return the expected filename for a container. + + Supports docker, http, oras, and singularity URIs in `container`. + + Registry names provided in `registries` are removed from the filename to ensure that the same image + is used regardless of the registry. Only registry names that are part of `registries` are considered. + If the image name contains another registry, it will be kept in the filename. + + For instance, docker.io/nf-core/ubuntu:20.04 will be nf-core-ubuntu-20.04.img *only* if the registry + contains "docker.io". + """ + + # Generate file paths + # Based on simpleName() function in Nextflow code: + # https://github.com/nextflow-io/nextflow/blob/671ae6d85df44f906747c16f6d73208dbc402d49/modules/nextflow/src/main/groovy/nextflow/container/SingularityCache.groovy#L69-L94 + out_name = container + # Strip URI prefix + out_name = re.sub(r"^.*:\/\/", "", out_name) + + # Clean the file extension. This method must be implemented + # by any subclass + out_name = self.clean_container_file_extension(out_name) + + # Trim potential registries from the name for consistency. + # This will allow pipelines to work offline without symlinked images, + # if docker.registry / singularity.registry are set to empty strings at runtime, which can be included in the HPC config profiles easily. + if self.registry_set: + # Create a regex pattern from the set of registries + trim_pattern = "|".join(f"^{re.escape(registry)}-?".replace("/", "[/-]") for registry in self.registry_set) + # Use the pattern to trim the string + out_name = re.sub(f"{trim_pattern}", "", out_name) + + return out_name + + def fetch_containers( + self, + containers: Collection[str], + exclude_list: Container[str], + ): + """ + This is the main entrypoint of the container fetcher. It goes through + all the containers we find and does the appropriate action; copying + from cache or fetching from a remote location + """ + + # Create a new progress bar + self.progress = self.progress_factory(self.hide_progress) + + with self.progress: + # Check each container in the list and defer actions + containers_remote_fetch: list[tuple[str, Path]] = [] + containers_copy: list[tuple[str, Path, Path]] = [] + + # The first task is to check what to do with each container + total_tasks = len(containers) + self.progress.add_main_task(total=total_tasks) + + for container in containers: + container_filename = self.get_container_filename(container) + + # Files in the remote cache are already downloaded and can be ignored + if container_filename in exclude_list: + log.debug(f"Skipping download of container '{container_filename}' as it is cached remotely.") + self.progress.update_main_task(advance=1) + continue + + # Generate file paths for all three locations + output_path = self.get_container_output_dir() / container_filename + + if output_path.exists(): + log.debug( + f"Skipping download of container '{container_filename}' as it is already in `{self.get_container_output_dir()}`." + ) + self.progress.update_main_task(advance=1) + continue + + library_path = self.library_dir / container_filename if self.library_dir is not None else None + cache_path = self.cache_dir / container_filename if self.cache_dir is not None else None + + # Get the container from the library + if library_path and library_path.exists(): + # Update the cache if needed + if cache_path and not cache_path.exists() and self.amend_cachedir: + containers_copy.append((container, library_path, cache_path)) + + if not self.amend_cachedir: + # We are not just amending the cache directory, so the file should be copied to the output + containers_copy.append((container, library_path, output_path)) + + # Get the container from the cache + elif cache_path and cache_path.exists() and not self.amend_cachedir: + log.debug(f"Container '{container_filename}' found in cache at '{cache_path}'.") + containers_copy.append((container, cache_path, output_path)) + + # Image is not in library or cache + else: + # Fetching of remote containers, either pulling or downloading, differs between docker and singularity: + # - Singularity images can either be downloaded from an http address, or pulled from a registry with `(singularity|apptainer) pull` + # - Docker images are always pulled, but needs the additional `docker image save` command for the image to be saved in the correct place + if cache_path: + # Download into the cache + containers_remote_fetch.append((container, cache_path)) + + # Do not copy to the output directory "(docker|singularity)-images" if we are solely amending the cache + if not self.amend_cachedir: + containers_copy.append((container, cache_path, output_path)) + total_tasks += 1 + else: + # There is no cache directory so download or pull directly to the output + containers_remote_fetch.append((container, output_path)) + + self.progress.update_main_task(total=total_tasks) + + # Fetch containers from a remote location + if containers_remote_fetch: + self.progress.add_remote_fetch_task( + total=len(containers_remote_fetch), + description=f"Fetch remote {self.implementation} images", + ) + self.fetch_remote_containers(containers_remote_fetch, parallel=self.parallel) + self.progress.remove_remote_fetch_task() + + # Copy containers + if containers_copy: + self.progress.add_copy_task( + total=len(containers_copy), + description="Copy container images from/to cache", + ) + for container, src_path, dest_path in containers_copy: + self.copy_image(container, src_path, dest_path) + self.progress.advance_copy_task() + self.progress.remove_copy_task() + + self.progress.remove_main_task() + # Unset the progress bar, so that we get an AssertionError if we access it after it is closed + self.progress = None + + @abstractmethod + def fetch_remote_containers(self, containers: list[tuple[str, Path]], parallel: int = 4) -> None: + """ + Fetch remote containers + + - Singularity: pull or download images, depending on what address we have + - Docker: pull and save images + + This function should update the main progress task accordingly + """ + pass + + def copy_image(self, container: str, src_path: Path, dest_path: Path) -> None: + """Copy container image from one directory to another.""" + # Check that the source path exists + if not src_path.exists(): + log.error(f"Image '{container}' does not exist") + return + + with intermediate_file(dest_path) as dest_path_tmp: + shutil.copyfile(src_path, dest_path_tmp.name) + + def cleanup(self) -> None: + """ + Cleanup any temporary files or resources. + """ + pass diff --git a/nf_core/pipelines/download/docker.py b/nf_core/pipelines/download/docker.py new file mode 100644 index 0000000000..043190edde --- /dev/null +++ b/nf_core/pipelines/download/docker.py @@ -0,0 +1,410 @@ +import concurrent +import concurrent.futures +import itertools +import logging +import re +import select +import shutil +import subprocess +from collections.abc import Iterable +from pathlib import Path +from typing import Optional + +import rich.progress + +import nf_core.utils +from nf_core.pipelines.download.container_fetcher import ContainerFetcher, ContainerProgress + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=nf_core.utils.rich_force_colors(), +) + + +class DockerProgress(ContainerProgress): + def get_task_types_and_columns(self): + task_types_and_columns = super().get_task_types_and_columns() + task_types_and_columns.update( + { + "docker": ( + "[magenta]{task.description}", + # "[blue]{task.fields[current_log]}", + rich.progress.BarColumn(bar_width=None), + "([blue]{task.fields[status]})", + ), + } + ) + return task_types_and_columns + + +class DockerFetcher(ContainerFetcher): + """ + Fetcher for Docker containers. + """ + + def __init__( + self, + outdir: Path, + container_library: Iterable[str], + registry_set: Iterable[str], + parallel: int = 4, + hide_progress: bool = False, + ): + """ + Intialize the Docker image fetcher + + """ + container_output_dir = outdir / "docker-images" + super().__init__( + container_output_dir=container_output_dir, + container_library=container_library, + registry_set=registry_set, + progress_factory=DockerProgress, + cache_dir=None, # Docker does not use a cache directory + library_dir=None, # Docker does not use a library directory + amend_cachedir=False, # Docker does not use a cache directory + parallel=parallel, + hide_progress=hide_progress, + ) + + # We will always use Docker, so check if it is installed directly. + self.check_and_set_implementation() + + def check_and_set_implementation(self) -> None: + """ + Check if Docker is installed and set the implementation. + """ + if not shutil.which("docker"): + raise OSError("Docker is needed to pull images, but it is not installed or not in $PATH") + self.implementation = "docker" + + def clean_container_file_extension(self, container_fn): + """ + This makes sure that the Docker container filename has a .tar extension + """ + extension = ".tar" + container_fn = container_fn.rstrip(extension) + # Strip : and / characters + container_fn = container_fn.replace("/", "-").replace(":", "-") + # Add file extension + container_fn = container_fn + extension + return container_fn + + def fetch_remote_containers(self, containers: list[tuple[str, Path]], parallel: int = 4) -> None: + """ + Fetch a set of remote container images. + + This is the main entry point for the subclass, and is called by + the `fetch_containers` method in the superclass. + + Args: + containers (list[tuple[str, Path]]): A list of container names and output paths. + parallel (int): The number of containers to fetch in parallel. + """ + with concurrent.futures.ThreadPoolExecutor(max_workers=parallel) as pool: + futures = [] + + # Initialize the wait_future to None, which will be used to wait for the previous pull task to finish + for container, output_path in containers: + # Submit the pull task to the pool + future = pool.submit(self.pull_and_save_image, container, output_path) + futures.append(future) + + # Make ctrl-c work with multi-threading: set a sentinel that is checked by the subprocesses + self.kill_with_fire = False + + # Wait for all pull and save tasks to finish + try: + for future in concurrent.futures.as_completed(futures): + try: + future.result() # This will raise an exception if the pull or save failed + except DockerError as e: + log.error(f"Error while processing container {e.container}: {e.message}") + except Exception as e: + log.error(f"Unexpected error: {e}") + + except KeyboardInterrupt: + # Cancel the future threads that haven't started yet + for future in futures: + future.cancel() + # Set the sentinel to True to pass the signal to subprocesses + self.kill_with_fire = True + # Re-raise exception on the main thread + raise + + def pull_and_save_image(self, container: str, output_path: Path) -> None: + """ + Pull a docker image and then save it + + Args: + container (str): The container name. + output_path (Path): The path to save the container image. + """ + # Progress bar to show that something is happening + container_short_name = container.split("/")[-1][:50] + task = self.progress.add_task( + f"Fetching '{container_short_name}'", + progress_type="docker", + current_log="", + total=2, + status="Pulling", + ) + + try: + self.pull_image(container, task) + except (DockerError.InvalidTagError, DockerError.ImageNotFoundError) as e: + log.error(e.message) + except DockerError.OtherError as e: + # Try other registries + log.error(e.message) + log.error(e.helpmessage) + + # Update progress bar + self.progress.advance(task) + self.progress.update(task, status="Saving") + # self.progress.update(task, description=f"Saving '{container_short_name}'") + + # Save the image + self.save_image(container, output_path, task) + + # Update progress bar + self.progress.advance(task) + self.progress.remove_task(task) + + # Task should advance in any case. Failure to pull will not kill the pulling process. + self.progress.advance_remote_fetch_task() + + def construct_pull_command(self, address: str) -> list[str]: + """ + Construct the command to pull a Docker image. + + Args: + address (str): The address of the container to pull. + """ + pull_command = ["docker", "image", "pull", address] + log.debug(f"Docker command: {' '.join(pull_command)}") + return pull_command + + def pull_image(self, container: str, progress_task: rich.progress.Task) -> None: + """ + Pull a single Docker image from a registry. + + Args: + container (str): The container. Should be the full address of the container e.g. `quay.io/biocontainers/name:version` + output_path (str): The final local output path + prev_pull_future (concurrent.futures.Future, None): A future that is used to wait for the previous pull task to finish. + """ + # Try pulling the image from the specified address + pull_command = self.construct_pull_command(container) + log.debug(f"Pulling docker image: {container}") + self._run_docker_command(pull_command, container, None, container, progress_task) + + def construct_save_command(self, output_path: Path, address: str) -> list[str]: + """ + Construct the command to save a Docker image. + + Args: + output_path (Path): The path to save the container image. + address (str): The address of the container to save. + """ + save_command = [ + "docker", + "image", + "save", + address, + "--output", + str(output_path), + ] + return save_command + + def save_image(self, container: str, output_path: Path, progress_task: rich.progress.Task) -> None: + """Save a Docker image that has been pulled to a file. + + Args: + container (str): A pipeline's container name. Usually it is of similar format + to ``biocontainers/name:tag`` + out_path (str): The final target output path + cache_path (str, None): The NXF_DOCKER_CACHEDIR path if set, None if not + wait_future (concurrent.futures.Future, None): A future that is used to wait for the previous pull task to finish. + """ + log.debug(f"Saving Docker image '{container}' to {output_path}") + address = container + save_command = self.construct_save_command(output_path, address) + self._run_docker_command(save_command, container, output_path, address, progress_task) + + def _run_docker_command( + self, + command: list[str], + container: str, + output_path: Optional[Path], + address: str, + progress_task: rich.progress.Task, + ) -> None: + """ + Internal command to run docker commands and error handle them properly + """ + with subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + bufsize=1, + ) as proc: + # Monitor the process: + # - read lines if there are any, + # - check if we should kill it, + # - update the progress bar + lines = [] + while True: + if self.kill_with_fire: + proc.kill() + raise KeyboardInterrupt("Docker command was cancelled by user") + + rlist, _, _ = select.select([proc.stdout], [], [], 0.1) + if rlist and proc.stdout is not None: + line = proc.stdout.readline() + if line: + lines.append(line) + self.progress.update(progress_task, current_log=line.strip()) + elif proc.poll() is not None: + # Process has finished, break the loop + break + elif proc.poll() is not None: + # Process has finished, break the loop + break + log.debug( + f"Docker command '{' '.join(command)}' finished with return code {proc.returncode}. Waiting for it to exit." + ) + proc.wait() + log.debug(f"Docker command '{' '.join(command)}' has exited.") + + if lines: + # something went wrong with the container retrieval + possible_error_lines = { + "invalid reference format", + "Error response from daemon:", + } + if any(pel in line for pel in possible_error_lines for line in lines): + self.progress.remove_task(progress_task) + raise DockerError( + container=container, + address=address, + out_path=output_path, + command=command, + error_msg=lines, + ) + + def cleanup(self) -> None: + """ + Cleanup by writing the load message to the screen + """ + super().cleanup() + self.write_docker_load_message() + + def write_docker_load_message(self) -> None: + """ + Write a message to the user about how to load the downloaded docker images into the offline docker daemon + """ + # There is not direct Nextflow support for loading docker images like we do for Singularity + # Instead we give the user a `bash` command to load the downloaded docker images into the offline docker daemon + # Courtesy of @vmkalbskopf in https://github.com/nextflow-io/nextflow/discussions/4708 + # TODO: Should we create a bash script instead? + docker_load_command = "ls -1 *.tar | xargs --no-run-if-empty -L 1 docker load -i" + indent_spaces = 4 + docker_img_dir = self.get_container_output_dir() + stderr.print( + "\n" + + (1 * indent_spaces * " " + f"Downloaded docker images written to [blue not bold]'{docker_img_dir}'[/]. ") + + (0 * indent_spaces * " " + "After copying the pipeline and images to the offline machine, run\n\n") + + (2 * indent_spaces * " " + f"[blue bold]{docker_load_command}[/]\n\n") + + ( + 1 * indent_spaces * " " + + f"inside [blue not bold]'{docker_img_dir}'[/] to load the images into the offline docker daemon." + ) + + "\n" + ) + + +# Distinct errors for the docker container fetching, required for acting on the exceptions +class DockerError(Exception): + """A class of errors related to pulling containers with Docker""" + + def __init__( + self, + container, + address, + out_path, + command, + error_msg, + ): + self.container = container + self.address = address + self.out_path = out_path + self.command = command + self.error_msg = error_msg + self.message = None + + error_patterns = { + r"reference does not exist": self.ImageNotPulledError, + r"repository does not exist": self.ImageNotFoundError, + r"Error response from daemon: Head .*: denied": self.ImageNotFoundError, + r"manifest unknown": self.InvalidTagError, + } + + for line, (pattern, error_class) in itertools.product(error_msg, error_patterns.items()): + if re.search(pattern, line): + self.error_type = error_class(self) + break + else: + self.error_type = self.OtherError(self) + + log.error(self.error_type.message) + log.info(self.error_type.helpmessage) + log.debug(f"Failed command:\n{' '.join(self.command)}") + log.debug(f"Docker error messages:\n{''.join(error_msg)}") + + raise self.error_type + + class ImageNotPulledError(AttributeError): + """Docker is trying to save an image that was not pulled""" + + def __init__(self, error_log): + self.error_log = error_log + self.message = f'[bold red] Cannot save "{self.error_log.container}" as it was not pulled [/]\n' + self.helpmessage = "Please pull the image first and confirm that it can be pulled.\n" + super().__init__(self.message) + + class ImageNotFoundError(FileNotFoundError): + """The image can not be found in the registry""" + + def __init__(self, error_log): + self.error_log = error_log + self.message = f'[bold red]"The pipeline requested the download of non-existing container image "{self.error_log.address}"[/]\n' + self.helpmessage = ( + f'Please try to rerun \n"{" ".join(self.error_log.command)}" manually with a different registry.f\n' + ) + + super().__init__(self.message) + + class InvalidTagError(AttributeError): + """Image and registry are valid, but the (version) tag is not""" + + def __init__(self, error_log): + self.error_log = error_log + self.message = f'[bold red]"{self.error_log.address.split(":")[-1]}" is not a valid tag of "{self.error_log.container}"[/]\n' + self.helpmessage = f'Please chose a different library than {self.error_log.address}\nor try to locate the "{self.error_log.address.split(":")[-1]}" version of "{self.error_log.container}" manually.\nPlease troubleshoot the command \n"{" ".join(self.error_log.command)}" manually.\n' + super().__init__(self.message) + + class OtherError(RuntimeError): + """Undefined error with the container""" + + def __init__(self, error_log): + self.error_log = error_log + self.message = f'[bold red]"The pipeline requested the download of non-existing container image "{self.error_log.address}"[/]\n' + self.helpmessage = ( + f'Please try to rerun \n"{" ".join(self.error_log.command)}" manually with a different registry.\n' + ) + super().__init__(self.message, self.helpmessage, self.error_log) diff --git a/nf_core/pipelines/download/download.py b/nf_core/pipelines/download/download.py new file mode 100644 index 0000000000..d75fad4c63 --- /dev/null +++ b/nf_core/pipelines/download/download.py @@ -0,0 +1,772 @@ +"""Downloads a nf-core pipeline to the local file system.""" + +import io +import json +import logging +import os +import re +import shutil +import tarfile +from datetime import datetime +from pathlib import Path +from typing import Any, Literal, Optional, Union +from zipfile import ZipFile + +import questionary +import requests +import rich + +import nf_core +import nf_core.pipelines.list +import nf_core.utils +from nf_core.pipelines.download.container_fetcher import ContainerFetcher +from nf_core.pipelines.download.docker import DockerFetcher +from nf_core.pipelines.download.singularity import SINGULARITY_CACHE_DIR_ENV_VAR, SingularityFetcher +from nf_core.pipelines.download.utils import DownloadError +from nf_core.pipelines.download.workflow_repo import WorkflowRepo +from nf_core.utils import ( + NF_INSPECT_MIN_NF_VERSION, + NFCORE_VER_LAST_WITHOUT_NF_INSPECT, + check_nextflow_version, + pretty_nf_version, + run_cmd, +) + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=nf_core.utils.rich_force_colors(), +) + + +class DownloadWorkflow: + """Downloads a nf-core workflow from GitHub to the local file system. + + Can also download its Singularity container image if required. + + Args: + pipeline (Optional[str]): A nf-core pipeline name. + revision (Optional[Union[tuple[str], str]]): The workflow revision(s) to download, like `1.0` or `dev` . Defaults to None. + outdir (Optional[Path]): Path to the local download directory. Defaults to None. + compress_type (Optional[str]): Type of compression for the downloaded files. Defaults to None. + force (bool): Flag to force download even if files already exist (overwrite existing files). Defaults to False. + platform (bool): Flag to customize the download for Seqera Platform (convert to git bare repo). Defaults to False. + download_configuration (Optional[str]): Download the configuration files from nf-core/configs. Defaults to None. + additional_tags (Optional[Union[list[str], str]]): Specify additional tags to add to the downloaded pipeline. Defaults to None. + container_system (str): The container system to use (e.g., "singularity"). Defaults to None. + container_library (Optional[Union[tuple[str], str]]): The container libraries (registries) to use. Defaults to None. + container_cache_utilisation (Optional[str]): If a local or remote cache of already existing container images should be considered. Defaults to None. + container_cache_index (Optional[Path]): An index for the remote container cache. Defaults to None. + parallel (int): The number of parallel downloads to use. Defaults to 4. + hide_progress (bool): Flag to hide the progress bar. Defaults to False. + """ + + def __init__( + self, + pipeline: Optional[str] = None, + revision: Optional[Union[tuple[str, ...], str]] = None, + outdir: Optional[Path] = None, + compress_type: Optional[str] = None, + force: bool = False, + platform: bool = False, + download_configuration: Optional[str] = None, + additional_tags: Optional[Union[tuple[str, ...], str]] = None, + container_system: Optional[str] = None, + container_library: Optional[Union[tuple[str, ...], str]] = None, + container_cache_utilisation: Optional[str] = None, + container_cache_index: Optional[Path] = None, + parallel: int = 4, + hide_progress: bool = False, + ): + # Verify that the flags provided make sense together + if ( + container_system == "docker" + and container_cache_utilisation != "copy" + and container_cache_utilisation is not None + ): + raise DownloadError( + "Only the 'copy' option for --container-cache-utilisation is supported for Docker images. " + ) + + self._pipeline = pipeline + if isinstance(revision, str): + self.revision = [revision] + elif isinstance(revision, tuple): + self.revision = [*revision] + else: + self.revision = [] + self._outdir: Optional[Path] = Path(outdir) if outdir is not None else None + self.output_filename: Optional[Path] = None + + self.compress_type = compress_type + self.force = force + self.hide_progress = hide_progress + self.platform = platform + self.fullname: Optional[str] = None + # downloading configs is not supported for Seqera Platform downloads. + self.include_configs = True if download_configuration == "yes" and not bool(platform) else False + # Additional tags to add to the downloaded pipeline. This enables to mark particular commits or revisions with + # additional tags, e.g. "stable", "testing", "validated", "production" etc. Since this requires a git-repo, it is only + # available for the bare / Seqera Platform download. + self.additional_tags: Optional[list[str]] + if isinstance(additional_tags, str) and bool(len(additional_tags)) and self.platform: + self.additional_tags = [additional_tags] + elif isinstance(additional_tags, tuple) and bool(len(additional_tags)) and self.platform: + self.additional_tags = [*additional_tags] + else: + self.additional_tags = None + + self.container_system = container_system + self.container_fetcher: Optional[ContainerFetcher] = None + # Check if a cache or libraries were specfied even though singularity was not + if self.container_system != "singularity": + if container_cache_index: + log.warning("The flag '--container-cache-index' is set, but not selected to fetch singularity images") + self.prompt_use_singularity( + "The '--container-cache-index' flag is only applicable when fetching singularity images" + ) + + if container_library: + log.warning("You have specified container libraries but not selected to fetch singularity image") + self.prompt_use_singularity( + "The '--container-library' flag is only applicable when fetching singularity images" + ) # Is this correct? + + # Manually specified container library (registry) + if isinstance(container_library, str) and bool(len(container_library)): + self.container_library = [container_library] + elif isinstance(container_library, tuple) and bool(len(container_library)): + self.container_library = [*container_library] + else: + self.container_library = ["quay.io"] + # Create a new set and add all values from self.container_library (CLI arguments to --container-library) + self.registry_set = set(self.container_library) if hasattr(self, "container_library") else set() + # if a container_cache_index is given, use the file and overrule choice. + self.container_cache_utilisation = "remote" if container_cache_index else container_cache_utilisation + self.container_cache_index = container_cache_index + # allows to specify a container library / registry or a respective mirror to download images from + self.parallel = parallel + + self.wf_revisions: list[dict[str, Any]] = [] + self.wf_branches: dict[str, Any] = {} + self.wf_sha: dict[str, str] = {} + self.wf_download_url: dict[str, str] = {} + self.nf_config: dict[str, str] = {} + self.containers: list[str] = [] + self.containers_remote: list[str] = [] # stores the remote images provided in the file. + + # Fetch remote workflows + self.wfs = nf_core.pipelines.list.Workflows() + self.wfs.get_remote_workflows() + + @property + def pipeline(self) -> str: + """ + Get the pipeline name. + """ + assert self._pipeline is not None # mypy + return self._pipeline + + @pipeline.setter + def pipeline(self, pipeline: str) -> None: + """ + Set the pipeline name. + """ + self._pipeline = pipeline + + @property + def outdir(self) -> Path: + """ + Get the output directory for the download. + """ + assert self._outdir is not None # mypy + return self._outdir + + @outdir.setter + def outdir(self, outdir: Path) -> None: + """ + Set the output directory for the download. + """ + self._outdir = outdir + + def download_workflow(self) -> None: + """Starts a nf-core workflow download.""" + + # Get workflow details + try: + self.prompt_pipeline_name() + self.pipeline, self.wf_revisions, self.wf_branches = nf_core.utils.get_repo_releases_branches( + self.pipeline, self.wfs + ) + self.prompt_revision() + self.get_revision_hash() + + # After this point the outdir should be set + assert self.outdir is not None # mypy + + # Inclusion of configs is unnecessary for Seqera Platform. + if not self.platform and self.include_configs is None: + self.prompt_config_inclusion() + # Prompt the user for whether containers should be downloaded + if self.container_system is None: + self.prompt_container_download() + + # Check if we have an outdated Nextflow version + if ( + self.container_system is not None + and self.container_system != "none" + and not check_nextflow_version(NF_INSPECT_MIN_NF_VERSION) + ): + log.error( + f"Container download requires Nextflow version >= {pretty_nf_version(NF_INSPECT_MIN_NF_VERSION)}\n" + f"Please update your Nextflow version with [magenta]'nextflow self-update'[/]\n" + f"or use a version of 'nf-core/tools' <= {'.'.join([str(i) for i in NFCORE_VER_LAST_WITHOUT_NF_INSPECT])}" + ) + return + + # Setup the appropriate ContainerFetcher object + self.setup_container_fetcher() + + # Nothing meaningful to compress here. + if not self.platform: + self.prompt_compression_type() + except AssertionError as e: + raise DownloadError(e) from e + + summary_log = [ + f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0] + ',[' + str(len(self.revision) - 2) + ' more revisions],' + self.revision[-1]}'", + f"Use containers: '{self.container_system}'", + ] + if self.container_system: + summary_log.append(f"Container library: '{', '.join(self.container_library)}'") + if self.container_system == "singularity" and os.environ.get(SINGULARITY_CACHE_DIR_ENV_VAR) is not None: + summary_log.append( + f"Using [blue]{SINGULARITY_CACHE_DIR_ENV_VAR}[/]': {os.environ[SINGULARITY_CACHE_DIR_ENV_VAR]}'" + ) + if self.containers_remote: + summary_log.append( + f"Successfully read {len(self.containers_remote)} containers from the remote '{SINGULARITY_CACHE_DIR_ENV_VAR}' contents." + ) + + # Set an output filename now that we have the outdir + if self.platform: + self.output_filename = self.outdir.parent / (self.outdir.name + ".git") + summary_log.append(f"Output file: '{self.output_filename}'") + elif self.compress_type is not None: + self.output_filename = self.outdir.parent / (self.outdir.name + "." + self.compress_type) + summary_log.append(f"Output file: '{self.output_filename}'") + else: + summary_log.append(f"Output directory: '{self.outdir}'") + + if not self.platform: + # Only show entry, if option was prompted. + summary_log.append(f"Include default institutional configuration: '{self.include_configs}'") + else: + summary_log.append(f"Enabled for Seqera Platform: '{self.platform}'") + + # Check that the outdir doesn't already exist + if self.outdir.exists(): + if not self.force: + raise DownloadError( + f"Output directory '{self.outdir}' already exists (use [red]--force[/] to overwrite)" + ) + log.warning(f"Deleting existing output directory: '{self.outdir}'") + shutil.rmtree(self.outdir) + + # Check that compressed output file doesn't already exist + if self.output_filename and self.output_filename.exists(): + if not self.force: + raise DownloadError( + f"Output file '{self.output_filename}' already exists (use [red]--force[/] to overwrite)" + ) + log.warning(f"Deleting existing output file: '{self.output_filename}'") + self.output_filename.unlink() + + # Summary log + log_lines = "\n".join(summary_log) + log.info(f"Saving '{self.pipeline}'\n{log_lines}") + + # Perform the actual download + if self.platform: + log.info("Downloading workflow for Seqera Platform") + self.download_workflow_platform() + else: + log.info("Downloading workflow") + self.download_workflow_static() + + # The container fetcher might have some clean-up code, call it + if self.container_fetcher: + self.container_fetcher.cleanup() + + def download_workflow_static(self) -> None: + """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" + + # Download the centralised configs first + if self.include_configs: + log.info("Downloading centralised configs from GitHub") + self.download_configs() + + # Download the pipeline files for each selected revision + log.info("Downloading workflow files from GitHub") + + for revision, wf_sha, download_url in zip(self.revision, self.wf_sha.values(), self.wf_download_url.values()): + revision_dirname = self.download_wf_files(revision=revision, wf_sha=wf_sha, download_url=download_url) + + if self.include_configs: + try: + self.wf_use_local_configs(revision_dirname) + except FileNotFoundError as e: + raise DownloadError("Error editing pipeline config file to use local configs!") from e + + # Collect all required container images + if self.container_system in {"singularity", "docker"}: + self.find_container_images(self.outdir / revision_dirname, revision) + self.gather_registries(self.outdir / revision_dirname) + + try: + self.download_container_images(current_revision=revision) + except OSError as e: + raise DownloadError(f"[red]{e}[/]") from e + + # Compress into an archive + if self.compress_type is not None: + log.info("Compressing output into archive") + self.compress_download() + + def download_workflow_platform(self, location: Optional[Path] = None) -> None: + """Create a bare-cloned git repository of the workflow, so it can be launched with `tw launch` as file:/ pipeline""" + assert self.output_filename is not None # mypy + + log.info("Collecting workflow from GitHub") + + self.workflow_repo = WorkflowRepo( + remote_url=f"https://github.com/{self.pipeline}.git", + revision=self.revision if self.revision else None, + commit=self.wf_sha.values() if bool(self.wf_sha) else None, + additional_tags=self.additional_tags, + location=location if location else None, # manual location is required for the tests to work + in_cache=False, + ) + + # Remove tags for those revisions that had not been selected + self.workflow_repo.tidy_tags_and_branches() + + # create a bare clone of the modified repository needed for Seqera Platform + self.workflow_repo.bare_clone(self.outdir / self.output_filename) + + # extract the required containers + if self.container_system in {"singularity", "docker"}: + for revision, commit in self.wf_sha.items(): + # Checkout the repo in the current revision + self.workflow_repo.checkout(commit) + # Collect all required singularity images + self.find_container_images(self.workflow_repo.access(), revision) + self.gather_registries(self.workflow_repo.access()) + + try: + self.download_container_images(current_revision=revision) + except OSError as e: + raise DownloadError(f"[red]{e}[/]") from e + + # Justify why compression is skipped for Seqera Platform downloads (Prompt is not shown, but CLI argument could have been set) + if self.compress_type is not None: + log.info( + "Compression choice is ignored for Seqera Platform downloads since nothing can be reasonably compressed." + ) + + def prompt_pipeline_name(self) -> None: + """Prompt for the pipeline name if not set with a flag""" + + if self._pipeline is None: + stderr.print("Specify the name of a nf-core pipeline or a GitHub repository name (user/repo).") + self.pipeline = nf_core.utils.prompt_remote_pipeline_name(self.wfs) + + def prompt_revision(self) -> None: + """ + Prompt for pipeline revision / branch + Prompt user for revision tag if '--revision' was not set + If --platform is specified, allow to select multiple revisions + Also the static download allows for multiple revisions, but + we do not prompt this option interactively. + """ + if not bool(self.revision): + (choice, tag_set) = nf_core.utils.prompt_pipeline_release_branch( + self.wf_revisions, self.wf_branches, multiple=self.platform + ) + """ + The checkbox() prompt unfortunately does not support passing a Validator, + so a user who keeps pressing Enter will flounder past the selection without choice. + + bool(choice), bool(tag_set): + ############################# + True, True: A choice was made and revisions were available. + False, True: No selection was made, but revisions were available -> defaults to all available. + False, False: No selection was made because no revisions were available -> raise AssertionError. + True, False: Congratulations, you found a bug! That combo shouldn't happen. + """ + + if bool(choice): + # have to make sure that self.revision is a list of strings, regardless if choice is str or list of strings. + (self.revision.append(choice) if isinstance(choice, str) else self.revision.extend(choice)) + else: + if bool(tag_set): + self.revision = tag_set + log.info("No particular revision was selected, all available will be downloaded.") + else: + raise AssertionError(f"No revisions of {self.pipeline} available for download.") + + def get_revision_hash(self) -> None: + """Find specified revision / branch / commit hash""" + + for revision in self.revision: # revision is a list of strings, but may be of length 1 + # Branch + if revision in self.wf_branches.keys(): + self.wf_sha = {**self.wf_sha, revision: self.wf_branches[revision]} + + else: + # Revision + for r in self.wf_revisions: + if r["tag_name"] == revision: + self.wf_sha = {**self.wf_sha, revision: r["tag_sha"]} + break + + else: + # Commit - full or short hash + if commit_id := nf_core.utils.get_repo_commit(self.pipeline, revision): + self.wf_sha = {**self.wf_sha, revision: commit_id} + continue + + # Can't find the revisions or branch - throw an error + log.info( + "Available {} revisions: '{}'".format( + self.pipeline, + "', '".join([r["tag_name"] for r in self.wf_revisions]), + ) + ) + log.info("Available {} branches: '{}'".format(self.pipeline, "', '".join(self.wf_branches.keys()))) + raise AssertionError( + f"Not able to find revision / branch / commit '{revision}' for {self.pipeline}" + ) + + # Set the outdir + if not self._outdir: + if len(self.wf_sha) > 1: + self.outdir = Path( + f"{self.pipeline.replace('/', '-').lower()}_{datetime.now().strftime('%Y-%m-%d_%H-%M')}" + ) + else: + self.outdir = Path(f"{self.pipeline.replace('/', '-').lower()}_{self.revision[0]}") + + if not self.platform: + for revision, wf_sha in self.wf_sha.items(): + # Set the download URL and return - only applicable for classic downloads + self.wf_download_url = { + **self.wf_download_url, + revision: f"https://github.com/{self.pipeline}/archive/{wf_sha}.zip", + } + + def prompt_config_inclusion(self) -> None: + """Prompt for inclusion of institutional configurations""" + if stderr.is_interactive: # Use rich auto-detection of interactive shells + self.include_configs = questionary.confirm( + "Include the nf-core's default institutional configuration files into the download?", + style=nf_core.utils.nfcore_question_style, + ).ask() + else: + self.include_configs = False + # do not include by default. + + def prompt_container_download(self) -> None: + """Prompt whether to download container images or not""" + + if self.container_system is None and stderr.is_interactive and not self.platform: + stderr.print("\nIn addition to the pipeline code, this tool can download software containers.") + self.container_system = questionary.select( + "Download software container images:", + choices=["none", "singularity", "docker"], + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() + + def setup_container_fetcher(self) -> None: + """ + Create the appropriate ContainerFetcher object + """ + assert self.outdir is not None # mypy + if self.container_system == "singularity": + self.container_fetcher = SingularityFetcher( + outdir=self.outdir, + container_library=self.container_library, + registry_set=self.registry_set, + container_cache_utilisation=self.container_cache_utilisation, + container_cache_index=self.container_cache_index, + parallel=self.parallel, + hide_progress=self.hide_progress, + ) + elif self.container_system == "docker": + self.container_fetcher = DockerFetcher( + outdir=self.outdir, + registry_set=self.registry_set, + container_library=self.container_library, + parallel=self.parallel, + hide_progress=self.hide_progress, + ) + else: + self.container_fetcher = None + + def prompt_use_singularity(self, fail_message: str) -> None: + use_singularity = questionary.confirm( + "Do you want to download singularity images?", + style=nf_core.utils.nfcore_question_style, + ).ask() + if use_singularity: + self.container_system = "singularity" + else: + raise DownloadError(fail_message) + + def prompt_compression_type(self) -> None: + """Ask user if we should compress the downloaded files""" + if self.compress_type is None: + stderr.print( + "\nIf transferring the downloaded files to another system, it can be convenient to have everything compressed in a single file." + ) + if self.container_system == "singularity": + stderr.print( + "[bold]This is [italic]not[/] recommended when downloading Singularity images, as it can take a long time and saves very little space." + ) + self.compress_type = questionary.select( + "Choose compression type:", + choices=[ + "none", + "tar.gz", + "tar.bz2", + "zip", + ], + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() + + # Correct type for no-compression + if self.compress_type == "none": + self.compress_type = None + + def download_wf_files(self, revision: str, wf_sha: str, download_url: str) -> str: + """Downloads workflow files from GitHub to the :attr:`self.outdir`.""" + log.debug(f"Downloading {download_url}") + + # Download GitHub zip file into memory and extract + url = requests.get(download_url) + with ZipFile(io.BytesIO(url.content)) as zipfile: + zipfile.extractall(self.outdir) + + # create a filesystem-safe version of the revision name for the directory + revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", revision) + # account for name collisions, if there is a branch / release named "configs" or container output dir + if revision_dirname in ["configs", self.get_container_output_dir()]: + revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", self.pipeline + revision_dirname) + + # Rename the internal directory name to be more friendly + gh_name = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1] + ((self.outdir / gh_name).rename(self.outdir / revision_dirname),) + + # Make downloaded files executable + for dirpath, _, filelist in os.walk(self.outdir / revision_dirname): + for fname in filelist: + (Path(dirpath) / fname).chmod(0o775) + + return revision_dirname + + def download_configs(self) -> None: + """Downloads the centralised config profiles from nf-core/configs to :attr:`self.outdir`.""" + configs_zip_url = "https://github.com/nf-core/configs/archive/master.zip" + configs_local_dir = "configs-master" + log.debug(f"Downloading {configs_zip_url}") + + # Download GitHub zip file into memory and extract + url = requests.get(configs_zip_url) + with ZipFile(io.BytesIO(url.content)) as zipfile: + zipfile.extractall(self.outdir) + + # Rename the internal directory name to be more friendly + (self.outdir / configs_local_dir).rename(self.outdir / "configs") + + # Make downloaded files executable + + for dirpath, _, filelist in os.walk(self.outdir / "configs"): + for fname in filelist: + (Path(dirpath) / fname).chmod(0o775) + + def wf_use_local_configs(self, revision_dirname: str) -> None: + """Edit the downloaded nextflow.config file to use the local config files""" + + assert self.outdir is not None # mypy + nfconfig_fn = (self.outdir / revision_dirname) / "nextflow.config" + find_str = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + repl_str = "${projectDir}/../configs/" + log.debug(f"Editing 'params.custom_config_base' in '{nfconfig_fn}'") + + # Load the nextflow.config file into memory + with open(nfconfig_fn) as nfconfig_fh: + nfconfig = nfconfig_fh.read() + + # Replace the target string + log.debug(f"Replacing '{find_str}' with '{repl_str}'") + nfconfig = nfconfig.replace(find_str, repl_str) + + # Append the singularity.cacheDir to the end if we need it + if self.container_system == "singularity" and self.container_cache_utilisation == "copy": + nfconfig += ( + f"\n\n// Added by `nf-core pipelines download` v{nf_core.__version__} //\n" + + 'singularity.cacheDir = "${projectDir}/../singularity-images/"' + + "\n///////////////////////////////////////" + ) + + # Write the file out again + log.debug(f"Updating '{nfconfig_fn}'") + with open(nfconfig_fn, "w") as nfconfig_fh: + nfconfig_fh.write(nfconfig) + + def find_container_images( + self, workflow_directory: Path, revision: str, with_test_containers: bool = True, entrypoint: str = "main.nf" + ) -> None: + """ + Find container image names for workflow using the `nextflow inspect` command. + + Requires Nextflow >= 25.04.4 + + Args: + workflow_directory (Path): The directory containing the workflow files. + entrypoint (str): The entrypoint for the `nextflow inspect` command. + """ + + log.info( + f"Fetching container names for workflow revision {revision} using [magenta bold]nextflow inspect[/]. This might take a while." + ) + try: + # TODO: Select container system via profile. Is this stable enough? + # NOTE: We will likely don't need this after the switch to Seqera containers + profile_str = f"{self.container_system}" + if with_test_containers: + profile_str += ",test,test_full" + profile = f"-profile {profile_str}" if self.container_system else "" + + # Run nextflow inspect + executable = "nextflow" + cmd_params = f"inspect -format json {profile} {workflow_directory / entrypoint}" + cmd_out = run_cmd(executable, cmd_params) + if cmd_out is None: + raise DownloadError("Failed to run `nextflow inspect`. Please check your Nextflow installation.") + + out, _ = cmd_out + out_json = json.loads(out) + # NOTE: Should we save the container name too to have more meta information? + named_containers = {proc["name"]: proc["container"] for proc in out_json["processes"]} + # We only want to process unique containers + self.containers = list(set(named_containers.values())) + + except RuntimeError as e: + log.error("Running 'nextflow inspect' failed with the following error") + raise DownloadError(e) + + except KeyError as e: + log.error("Failed to parse output of 'nextflow inspect' to extract containers") + raise DownloadError(e) + + def gather_registries(self, workflow_directory: Path) -> None: + """Fetch the registries from the pipeline config and CLI arguments and store them in a set. + This is needed to symlink downloaded container images so Nextflow will find them. + """ + + # should exist, because find_container_images() is always called before + if not self.nf_config: + self.nf_config = nf_core.utils.fetch_wf_config(workflow_directory) + + # Select registries defined in pipeline config + configured_registries = [ + "apptainer.registry", + "docker.registry", + "podman.registry", + "singularity.registry", + ] + + for registry in configured_registries: + if registry in self.nf_config: + self.registry_set.add(self.nf_config[registry]) + + # add depot.galaxyproject.org to the set, because it is the default registry for singularity hardcoded in modules + self.registry_set.add("depot.galaxyproject.org/singularity") + + # add community.wave.seqera.io/library to the set to support the new Seqera Docker container registry + self.registry_set.add("community.wave.seqera.io/library") + + # add chttps://community-cr-prod.seqera.io/docker/registry/v2/ to the set to support the new Seqera Singularity container registry + self.registry_set.add("community-cr-prod.seqera.io/docker/registry/v2") + + def get_container_output_dir(self) -> Path: + assert self.outdir is not None # mypy + return self.outdir / f"{self.container_system}-images" + + def download_container_images(self, current_revision: str = "") -> None: + """ + Fetch the container images with the appropriate ContainerFetcher + + Args: + current_revision (str): The current revision of the workflow. + """ + + if len(self.containers) == 0: + log.info("No container names found in workflow") + else: + log.info( + f"Processing workflow revision {current_revision}, found {len(self.containers)} container image{'s' if len(self.containers) > 1 else ''} in total." + ) + log.debug(f"Container names: {self.containers}") + + out_path_dir = self.get_container_output_dir().absolute() + + # Check that the directories exist + if not out_path_dir.is_dir(): + log.debug(f"Output directory not found, creating: {out_path_dir}") + out_path_dir.mkdir(parents=True) + + if self.container_fetcher is not None: + self.container_fetcher.fetch_containers( + self.containers, + self.containers_remote, + ) + + def compress_download(self) -> None: + """Take the downloaded files and make a compressed .tar.gz archive.""" + log.debug(f"Creating archive: {self.output_filename}") + + # .tar.gz and .tar.bz2 files + if self.compress_type in ["tar.gz", "tar.bz2"]: + ctype_and_mode: dict[str, tuple[Literal["gz", "bz2"], Literal["w:gz", "w:bz2"]]] = { + "tar.gz": ("gz", "w:gz"), + "tar.bz2": ("bz2", "w:bz2"), + } # This ugly thing is required for typing + ctype, mode = ctype_and_mode[self.compress_type] + with tarfile.open(self.output_filename, mode) as tar: + tar.add(self.outdir, arcname=self.outdir.name) + tar_flags = "xzf" if ctype == "gz" else "xjf" + log.info(f"Command to extract files: [bright_magenta]tar -{tar_flags} {self.output_filename}[/]") + + # .zip files + if self.compress_type == "zip": + assert self.output_filename is not None # mypy + with ZipFile(self.output_filename, "w") as zip_file: + # Iterate over all the files in directory + for folder_name, _, filenames in os.walk(self.outdir): + for filename in filenames: + # create complete filepath of file in directory + file_path = Path(folder_name) / filename + # Add file to zip + zip_file.write(file_path) + log.info(f"Command to extract files: [bright_magenta]unzip {self.output_filename}[/]") + + # Delete original files + log.debug(f"Deleting uncompressed files: '{self.outdir}'") + shutil.rmtree(self.outdir) + + # Calculate md5sum for output file + log.info(f"MD5 checksum for '{self.output_filename}': [blue]{nf_core.utils.file_md5(self.output_filename)}[/]") diff --git a/nf_core/pipelines/download/singularity.py b/nf_core/pipelines/download/singularity.py new file mode 100644 index 0000000000..679e88f469 --- /dev/null +++ b/nf_core/pipelines/download/singularity.py @@ -0,0 +1,867 @@ +import concurrent.futures +import enum +import io +import itertools +import logging +import os +import re +import shutil +import subprocess +from collections.abc import Iterable +from pathlib import Path +from typing import Callable, Optional + +import questionary +import requests +import requests_cache +import rich.progress + +import nf_core.utils +from nf_core.pipelines.download.container_fetcher import ContainerFetcher, ContainerProgress +from nf_core.pipelines.download.utils import DownloadError, intermediate_file, intermediate_file_no_creation + +log = logging.getLogger(__name__) +stderr = rich.console.Console( + stderr=True, + style="dim", + highlight=False, + force_terminal=nf_core.utils.rich_force_colors(), +) + +SINGULARITY_CACHE_DIR_ENV_VAR = "NXF_SINGULARITY_CACHEDIR" +SINGULARITY_LIBRARY_DIR_ENV_VAR = "NXF_SINGULARITY_LIBRARYDIR" + + +class SingularityProgress(ContainerProgress): + def get_task_types_and_columns(self): + task_types_and_columns = super().get_task_types_and_columns() + task_types_and_columns.update( + { + "download": ( + "[blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.1f}%", + "•", + rich.progress.DownloadColumn(), + "•", + rich.progress.TransferSpeedColumn(), + ), + "singularity_pull": ( + "[magenta]{task.description}", + "[blue]{task.fields[current_log]}", + rich.progress.BarColumn(bar_width=None), + ), + } + ) + return task_types_and_columns + + +class SingularityFetcher(ContainerFetcher): + """ + Fetcher for Singularity containers. + """ + + def __init__( + self, + outdir: Path, + container_library: Iterable[str], + registry_set: Iterable[str], + container_cache_utilisation=None, + container_cache_index=None, + parallel: int = 4, + hide_progress: bool = False, + ): + # Check if the env variable for the Singularity cache directory is set + has_cache_dir = os.environ.get(SINGULARITY_CACHE_DIR_ENV_VAR) is not None + if not has_cache_dir and stderr.is_interactive: + # Prompt for the creation of a Singularity cache directory + has_cache_dir = SingularityFetcher.prompt_singularity_cachedir_creation() + + if has_cache_dir and container_cache_utilisation is None: + # No choice regarding singularity cache has been made. + container_cache_utilisation = SingularityFetcher.prompt_singularity_cachedir_utilization() + + if container_cache_utilisation == "remote": + # If we have a remote cache, we need to read it + if container_cache_index is None and stderr.is_interactive: + container_cache_index = SingularityFetcher.prompt_singularity_cachedir_remote() + if container_cache_index is None: + # No remote cache specified + self.container_cache_index = None + self.container_cache_utilisation = "copy" + + # If we have remote containers, we need to read them + if container_cache_utilisation == "remote" and container_cache_index is not None: + try: + self.remote_containers = SingularityFetcher.read_remote_singularity_containers( + container_cache_index + ) + except (FileNotFoundError, LookupError) as e: + log.error( + f"[red]Issue with reading the specified remote ${SINGULARITY_CACHE_DIR_ENV_VAR} index:[/]\n{e}\n" + ) + if stderr.is_interactive and rich.prompt.Confirm.ask( + "[blue]Specify a new index file and try again?" + ): + container_cache_index = SingularityFetcher.prompt_singularity_cachedir_remote() + else: + log.info( + f"Proceeding without consideration of the remote ${SINGULARITY_CACHE_DIR_ENV_VAR} index." + ) + self.container_cache_index = None + if os.environ.get(SINGULARITY_CACHE_DIR_ENV_VAR): + container_cache_utilisation = "copy" # default to copy if possible, otherwise skip. + else: + container_cache_utilisation = None + else: + log.warning("[red]No remote cache index specified, skipping remote container download.[/]") + + # Find out what the library directory is + library_dir = Path(path_str) if (path_str := os.environ.get(SINGULARITY_LIBRARY_DIR_ENV_VAR)) else None + if library_dir and not library_dir.is_dir(): + # Since the library is read-only, if the directory isn't there, we can forget about it + library_dir = None + + # Find out what the cache directory is + cache_dir = Path(path_str) if (path_str := os.environ.get(SINGULARITY_CACHE_DIR_ENV_VAR)) else None + log.debug(f"{SINGULARITY_CACHE_DIR_ENV_VAR}: {cache_dir}") + + if container_cache_utilisation in ["amend", "copy"]: + if cache_dir: + if not cache_dir.is_dir(): + log.debug(f"Cache directory not found, creating: {cache_dir}") + cache_dir.mkdir() + else: + raise FileNotFoundError(f"Singularity cache is required but no '{SINGULARITY_CACHE_DIR_ENV_VAR}' set!") + + container_output_dir = outdir / "singularity-images" + super().__init__( + container_output_dir=container_output_dir, + container_library=container_library, + registry_set=registry_set, + progress_factory=SingularityProgress, + cache_dir=cache_dir, + library_dir=library_dir, + amend_cachedir=container_cache_utilisation == "amend", + parallel=parallel, + hide_progress=hide_progress, + ) + + def check_and_set_implementation(self) -> None: + """ + Check if Singularity/Apptainer is installed and set the implementation. + + Raises: + OSError: If Singularity/Apptainer is not installed or not in $PATH + """ + if shutil.which("singularity"): + self.implementation = "singularity" + elif shutil.which("apptainer"): + self.implementation = "apptainer" + else: + raise OSError("Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH") + + def get_cache_dir(self) -> Path: + """ + Get the cache Singularity cache directory + + Returns: + Path: The cache directory + + Raises: + FileNotFoundError: If the cache directory is not set + """ + cache_dir = os.environ.get(SINGULARITY_CACHE_DIR_ENV_VAR) + if cache_dir is not None: + return Path(cache_dir) + else: + raise FileNotFoundError(f"Singularity cache is required but no '{SINGULARITY_CACHE_DIR_ENV_VAR}' set!") + + def make_cache_dir(self) -> None: + """ + Make the cache directory + """ + cache_dir = self.get_cache_dir() + if not cache_dir.is_dir(): + log.debug(f"Cache directory not found, creating: {cache_dir}") + cache_dir.mkdir() + + @staticmethod + def prompt_singularity_cachedir_creation() -> bool: + """Prompt about using singularity cache directory if not already set""" + stderr.print( + f"\nNextflow and nf-core can use an environment variable called [blue]${SINGULARITY_CACHE_DIR_ENV_VAR}[/] that is a path to a directory where remote Singularity images are stored. " + f"This allows downloaded images to be cached in a central location." + ) + if rich.prompt.Confirm.ask( + f"[blue bold]?[/] [bold]Define [blue not bold]${SINGULARITY_CACHE_DIR_ENV_VAR}[/] for a shared Singularity image download folder?[/]" + ): + cachedir_path = SingularityFetcher.prompt_singularity_cachedir_path() + if cachedir_path is None: + raise DownloadError(f"No {SINGULARITY_CACHE_DIR_ENV_VAR} specified, cannot continue.") + + os.environ[SINGULARITY_CACHE_DIR_ENV_VAR] = str(cachedir_path) + + # Optionally, create a permanent entry for the singularity cache directory in the terminal profile. + SingularityFetcher.prompt_singularity_cachedir_shellprofile_append(cachedir_path) + + return True + + return False + + @staticmethod + def prompt_singularity_cachedir_path() -> Optional[Path]: + """Prompt for the name of the Singularity cache directory""" + # Prompt user for a cache directory path + cachedir_path = None + while cachedir_path is None: + prompt_cachedir_path = questionary.path( + "Specify the path:", + only_directories=True, + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() + if prompt_cachedir_path == "": + log.error(f"Not using [blue]${SINGULARITY_CACHE_DIR_ENV_VAR}[/]") + return None + cachedir_path = Path(prompt_cachedir_path).expanduser().absolute() + if not cachedir_path.is_dir(): + log.error(f"'{cachedir_path}' is not a directory.") + cachedir_path = None + return cachedir_path + + @staticmethod + def prompt_singularity_cachedir_shellprofile_append(cachedir_path: Path) -> None: + """ + Prompt about appending the Singularity cache directory to the shell profile + + Currently support for bash and zsh. + ToDo: "sh", "dash", "ash","csh", "tcsh", "ksh", "fish", "cmd", "powershell", "pwsh"? + """ + shells_profile_paths = { + "bash": [Path("~/.bash_profile"), Path("~/.bashrc")], + "zsh": [Path("~/.zprofile"), Path("~/.zshenv")], + "sh": [Path("~/.profile")], + } + shell = Path(os.getenv("SHELL", "")).name + shellprofile_paths = shells_profile_paths.get(shell, [Path("~/.profile")]) + shellprofile_path: Optional[Path] = None + for profile_path in shellprofile_paths: + if profile_path.is_file(): + shellprofile_path = profile_path + break + + if shellprofile_path is not None: + stderr.print( + f"\nSo that [blue]${SINGULARITY_CACHE_DIR_ENV_VAR}[/] is always defined, you can add it to your [blue not bold]~/{shellprofile_path.name}[/] file ." + "This will then be automatically set every time you open a new terminal. We can add the following line to this file for you: \n" + f'[blue]export {SINGULARITY_CACHE_DIR_ENV_VAR}="{cachedir_path}"[/]' + ) + append_to_file = rich.prompt.Confirm.ask( + f"[blue bold]?[/] [bold]Add to [blue not bold]~/{shellprofile_path.name}[/] ?[/]" + ) + if append_to_file: + with open(shellprofile_path.expanduser(), "a") as f: + f.write( + "\n\n#######################################\n" + f"## Added by `nf-core pipelines download` v{nf_core.__version__} ##\n" + + f'export {SINGULARITY_CACHE_DIR_ENV_VAR}="{cachedir_path}"' + + "\n#######################################\n" + ) + log.info(f"Successfully wrote to [blue]{shellprofile_path}[/]") + log.warning("You will need reload your terminal after the download completes for this to take effect.") + else: + log.debug(f"No shell profile found for {shell}.") + + @staticmethod + def prompt_singularity_cachedir_utilization() -> str: + """Ask if we should *only* use singularity cache directory without copying into target""" + stderr.print( + "\nIf you are working on the same system where you will run Nextflow, you can amend the downloaded images to the ones in the" + f"[blue not bold]${SINGULARITY_CACHE_DIR_ENV_VAR}[/] folder, Nextflow will automatically find them. " + "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." + ) + return questionary.select( + f"Copy singularity images from {SINGULARITY_CACHE_DIR_ENV_VAR} to the target folder or amend new images to the cache?", + choices=["amend", "copy"], + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() + + @staticmethod + def prompt_singularity_cachedir_remote() -> Optional[Path]: + """Prompt about the index of a remote singularity cache directory""" + # Prompt user for a file listing the contents of the remote cache directory + cachedir_index = None + while cachedir_index is None: + prompt_cachedir_index = questionary.path( + "Specify a list of the container images that are already present on the remote system:", + validate=nf_core.utils.SingularityCacheFilePathValidator, + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() + if prompt_cachedir_index == "": + log.error(f"Will disregard contents of a remote [blue]${SINGULARITY_CACHE_DIR_ENV_VAR}[/]") + return None + cachedir_index = Path(prompt_cachedir_index).expanduser().absolute() + if not os.access(cachedir_index, os.R_OK): + log.error(f"'{cachedir_index}' is not a readable file.") + cachedir_index = None + + return cachedir_index + + @staticmethod + def read_remote_singularity_containers(container_cache_index: Path) -> list[str]: + """ + Reads the file specified as index for the remote Singularity cache dir + + Args: + container_cache_index (Path): The path to the index file + + Returns: + list[str]: A list of container names + + Raises: + LookupError: If no valid container names are found in the index file + """ + n_total_images = 0 + containers_remote = [] + with open(container_cache_index) as indexfile: + for line in indexfile.readlines(): + match = re.search(r"([^\/\\]+\.img)", line, re.S) + if match: + n_total_images += 1 + containers_remote.append(match.group(0)) + if n_total_images == 0: + raise LookupError("Could not find valid container names in the index file.") + containers_remote = sorted(list(set(containers_remote))) + log.debug(containers_remote) + return containers_remote + + def clean_container_file_extension(self, container_fn: str) -> str: + """ + This makes sure that the Singularity container filename has the right file extension + """ + # Detect file extension + extension = ".img" + if ".sif:" in container_fn: + extension = ".sif" + container_fn = container_fn.replace(".sif:", "-") + elif container_fn.endswith(".sif"): + extension = ".sif" + container_fn = container_fn.replace(".sif", "") + + # Strip : and / characters + container_fn = container_fn.replace("/", "-").replace(":", "-") + # Add file extension + container_fn = container_fn + extension + return container_fn + + def fetch_remote_containers(self, containers: list[tuple[str, Path]], parallel: int = 4) -> None: + """ + Fetch a set of remote container images. + + This is the main entry point for the subclass, and is called by + the `fetch_containers` method in the superclass. + + Args: + containers (list[tuple[str, Path]]): A list of container names and output paths. + parallel (int): The number of containers to fetch in parallel. + """ + # Split the list of containers depending on whether we want to pull them or download them + containers_pull = [] + containers_download = [] + for container, out_path in containers: + # If the container is a remote image, we pull it + if container.startswith("http"): + containers_download.append((container, out_path)) + else: + containers_pull.append((container, out_path)) + + log.debug(containers) + if containers_pull: + # We only need to set the implementation if we are pulling images + # -- a user could download images without having singularity/apptainer installed + self.check_and_set_implementation() + self.progress.update_remote_fetch_task(description="Pulling singularity images") + self.pull_images(containers_pull) + + if containers_download: + self.progress.update_remote_fetch_task(description="Downloading singularity images") + self.download_images(containers_download, parallel_downloads=parallel) + + def symlink_registries(self, image_path: Path) -> None: + """Create a symlink for each registry in the registry set that points to the image. + + The base image, e.g. ./nf-core-gatk-4.4.0.0.img will thus be symlinked as for example ./quay.io-nf-core-gatk-4.4.0.0.img + by prepending each registry in `registries` to the image name. + + Unfortunately, the output image name may contain a registry definition (Singularity image pulled from depot.galaxyproject.org + or older pipeline version, where the docker registry was part of the image name in the modules). Hence, it must be stripped + before to ensure that it is really the base name. + """ + + # Create a regex pattern from the set, in case trimming is needed. + trim_pattern = "|".join(f"^{re.escape(registry)}-?".replace("/", "[/-]") for registry in self.registry_set) + + for registry in self.registry_set: + # Nextflow will convert it like this as well, so we need it mimic its behavior + registry = registry.replace("/", "-") + + if not bool(re.search(trim_pattern, image_path.name)): + symlink_name = Path("./", f"{registry}-{image_path.name}") + else: + trimmed_name = re.sub(f"{trim_pattern}", "", image_path.name) + symlink_name = Path("./", f"{registry}-{trimmed_name}") + + symlink_full = Path(image_path.parent, symlink_name) + target_name = Path("./", image_path.name) + + if not symlink_full.exists() and target_name != symlink_name: + symlink_full.parent.mkdir(exist_ok=True) + image_dir = os.open(image_path.parent, os.O_RDONLY) + try: + os.symlink( + target_name, + symlink_name, + dir_fd=image_dir, + ) + log.debug(f"Symlinked {target_name} as {symlink_name}.") + finally: + os.close(image_dir) + + def copy_image(self, container: str, src_path: Path, dest_path: Path): + super().copy_image(container, src_path, dest_path) + # For Singularity we need to create symlinks to ensure that the + # images are found even with different registries being used. + self.symlink_registries(dest_path) + + def download_images( + self, + containers_download: Iterable[tuple[str, Path]], + parallel_downloads: int, + ) -> None: + downloader = FileDownloader(self.progress) + + def update_file_progress(input_params: tuple[str, Path], status: FileDownloader.Status) -> None: + # try-except introduced in 4a95a5b84e2becbb757ce91eee529aa5f8181ec7 + # unclear why rich.progress may raise an exception here as it's supposed to be thread-safe + container, output_path = input_params + try: + self.progress.advance_remote_fetch_task() + except Exception as e: + log.error(f"Error updating progress bar: {e}") + + if status == FileDownloader.Status.DONE: + self.symlink_registries(output_path) + + downloader.download_files_in_parallel(containers_download, parallel_downloads, callback=update_file_progress) + + def pull_images(self, containers_pull: Iterable[tuple[str, Path]]) -> None: + """ + Pull a set of container images using `singularity pull`. + + Args: + containers_pull (Iterable[tuple[str, Path]]): A list of container names and output paths. + + """ + for container, output_path in containers_pull: + # it is possible to try multiple registries / mirrors if multiple were specified. + # Iteration happens over a copy of self.container_library[:], as I want to be able to remove failing registries for subsequent images. + for library in self.container_library[:]: + try: + self.pull_image(container, output_path, library) + # Pulling the image was successful, no SingularityError was raised, break the library loop + break + + except SingularityError.RegistryNotFoundError as e: + self.container_library.remove(library) + # The only library was removed + if not self.container_library: + log.error(e.message) + log.error(e.helpmessage) + raise OSError from e + else: + # Other libraries can be used + continue + except SingularityError.ImageNotFoundError as e: + # Try other registries + if e.error_log.absolute_URI: + break # there no point in trying other registries if absolute URI was specified. + else: + continue + except SingularityError.InvalidTagError: + # Try other registries + continue + except SingularityError.OtherError as e: + # Try other registries + log.error(e.message) + log.error(e.helpmessage) + if e.error_log.absolute_URI: + break # there no point in trying other registries if absolute URI was specified. + else: + continue + else: + # The else clause executes after the loop completes normally. + # This means the library loop completed without breaking, indicating failure for all libraries (registries) + log.error( + f"Not able to pull image of {container}. Service might be down or internet connection is dead." + ) + # Task should advance in any case. Failure to pull will not kill the download process. + self.progress.update_remote_fetch_task(advance=1) + + def construct_pull_command(self, output_path: Path, address: str): + singularity_command = [self.implementation, "pull", "--name", str(output_path), address] + return singularity_command + + def get_address(self, container, library) -> tuple[str, bool]: + # Sometimes, container still contain an explicit library specification, which + # resulted in attempted pulls e.g. from docker://quay.io/quay.io/qiime2/core:2022.11 + # Thus, if an explicit registry is specified, the provided -l value is ignored. + # Additionally, check if the container to be pulled is native Singularity: oras:// protocol. + container_parts = container.split("/") + if len(container_parts) > 2: + address = container if container.startswith("oras://") else f"docker://{container}" + absolute_URI = True + else: + address = f"docker://{library}/{container.replace('docker://', '')}" + absolute_URI = False + + return address, absolute_URI + + def pull_image(self, container: str, output_path: Path, library: str) -> bool: + """ + Pull a singularity image using `singularity pull`. + + Args: + container (str): A pipeline's container name. Usually it is of similar format + to ``nfcore/name:version``. + library (list of str): A list of libraries to try for pulling the image. + + Raises: + Various exceptions possible from `subprocess` execution of Singularity. + """ + + address, absolute_URI = self.get_address(container, library) + + # Check if the image already exists in the output directory + # Since we are using a temporary file, need to do this explicitly. + if output_path.exists(): + log.debug(f"Image {container} already exists at {output_path}, skipping pull.") + return False + + with self.progress.sub_task( + container, + start=False, + total=False, + progress_type="singularity_pull", + current_log="", + ) as task: + with intermediate_file_no_creation(output_path) as output_path_tmp: + singularity_command = self.construct_pull_command(output_path_tmp, address) + log.debug(f"Building singularity image: {address}") + # Run the singularity pull command + with subprocess.Popen( + singularity_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + bufsize=1, + ) as proc: + lines = [] + if proc.stdout is not None: + for line in proc.stdout: + lines.append(line) + self.progress.update(task, current_log=line.strip()) + + if lines: + # something went wrong with the container retrieval + log.debug(f"Singularity pull output: {lines}") + if any("FATAL: " in line for line in lines): + raise SingularityError( + container=container, + registry=library, + address=address, + absolute_URI=absolute_URI, + out_path=output_path, + command=singularity_command, + error_msg=lines, + ) + + self.symlink_registries(output_path) + return True + + +# Distinct errors for the Singularity container download, required for acting on the exceptions +class SingularityError(Exception): + """A class of errors related to pulling containers with Singularity/Apptainer""" + + def __init__( + self, + container, + registry, + address, + absolute_URI, + out_path, + command, + error_msg, + ): + self.container = container + self.registry = registry + self.address = address + self.absolute_URI = absolute_URI + self.out_path = out_path + self.command = command + self.error_msg = error_msg + self.patterns = [] + + error_patterns = { + # The registry does not resolve to a valid IP address + r"dial\stcp.*no\ssuch\shost": self.RegistryNotFoundError, + # + # Unfortunately, every registry seems to return an individual error here: + # Docker.io: denied: requested access to the resource is denied + # unauthorized: authentication required + # Quay.io: StatusCode: 404, \n'] + # ghcr.io: Requesting bearer token: invalid status code from registry 400 (Bad Request) + # + r"requested\saccess\sto\sthe\sresource\sis\sdenied": self.ImageNotFoundError, # Docker.io + r"StatusCode:\s404": self.ImageNotFoundError, # Quay.io + r"invalid\sstatus\scode\sfrom\sregistry\s400": self.ImageNotFoundError, # ghcr.io + r"400|Bad\s?Request": self.ImageNotFoundError, # ghcr.io + # The image and registry are valid, but the (version) tag is not + r"manifest\sunknown": self.InvalidTagError, + # The container image is no native Singularity Image Format. + r"ORAS\sSIF\simage\sshould\shave\sa\ssingle\slayer": self.NoSingularityContainerError, + } + # Loop through the error messages and patterns. Since we want to have the option of + # no matches at all, we use itertools.product to allow for the use of the for ... else construct. + for line, (pattern, error_class) in itertools.product(error_msg, error_patterns.items()): + if re.search(pattern, line): + self.error_type = error_class(self) + break + else: + self.error_type = self.OtherError(self) + + log.error(self.error_type.message) + log.info(self.error_type.helpmessage) + log.debug(f"Failed command:\n{' '.join(command)}") + log.debug(f"Singularity error messages:\n{''.join(error_msg)}") + + raise self.error_type + + class RegistryNotFoundError(ConnectionRefusedError): + """The specified registry does not resolve to a valid IP address""" + + def __init__(self, error_log): + self.error_log = error_log + self.message = ( + f'[bold red]The specified container library "{self.error_log.registry}" is invalid or unreachable.[/]\n' + ) + self.helpmessage = ( + f'Please check, if you made a typo when providing "-l / --library {self.error_log.registry}"\n' + ) + super().__init__(self.message, self.helpmessage, self.error_log) + + class ImageNotFoundError(FileNotFoundError): + """The image can not be found in the registry""" + + def __init__(self, error_log): + self.error_log = error_log + if not self.error_log.absolute_URI: + self.message = ( + f'[bold red]"Pulling "{self.error_log.container}" from "{self.error_log.address}" failed.[/]\n' + ) + self.helpmessage = f'Saving image of "{self.error_log.container}" failed.\nPlease troubleshoot the command \n"{" ".join(self.error_log.command)}" manually.f\n' + else: + self.message = f'[bold red]"The pipeline requested the download of non-existing container image "{self.error_log.address}"[/]\n' + self.helpmessage = ( + f'Please try to rerun \n"{" ".join(self.error_log.command)}" manually with a different registry.f\n' + ) + + super().__init__(self.message) + + class InvalidTagError(AttributeError): + """Image and registry are valid, but the (version) tag is not""" + + def __init__(self, error_log): + self.error_log = error_log + self.message = f'[bold red]"{self.error_log.address.split(":")[-1]}" is not a valid tag of "{self.error_log.container}"[/]\n' + self.helpmessage = f'Please chose a different library than {self.error_log.registry}\nor try to locate the "{self.error_log.address.split(":")[-1]}" version of "{self.error_log.container}" manually.\nPlease troubleshoot the command \n"{" ".join(self.error_log.command)}" manually.\n' + super().__init__(self.message) + + class NoSingularityContainerError(RuntimeError): + """The container image is no native Singularity Image Format.""" + + def __init__(self, error_log): + self.error_log = error_log + self.message = ( + f'[bold red]"{self.error_log.container}" is no valid Singularity Image Format container.[/]\n' + ) + self.helpmessage = f"Pulling \"{self.error_log.container}\" failed, because it appears invalid. To convert from Docker's OCI format, prefix the URI with 'docker://' instead of 'oras://'.\n" + super().__init__(self.message) + + class OtherError(RuntimeError): + """Undefined error with the container""" + + def __init__(self, error_log): + self.error_log = error_log + if not self.error_log.absolute_URI: + self.message = f'[bold red]"{self.error_log.container}" failed for unclear reasons.[/]\n' + self.helpmessage = f'Pulling of "{self.error_log.container}" failed.\nPlease troubleshoot the command \n"{" ".join(self.error_log.command)}" manually.\n' + else: + self.message = f'[bold red]"The pipeline requested the download of non-existing container image "{self.error_log.address}"[/]\n' + self.helpmessage = ( + f'Please try to rerun \n"{" ".join(self.error_log.command)}" manually with a different registry.f\n' + ) + + super().__init__(self.message, self.helpmessage, self.error_log) + + +class FileDownloader: + """Class to download files. + + Downloads are done in parallel using threads. Progress of each download + is shown in a progress bar. + + Users can hook a callback method to be notified after each download. + """ + + # Enum to report the status of a download thread + Status = enum.Enum("Status", "CANCELLED PENDING RUNNING DONE ERROR") + + def __init__(self, progress: ContainerProgress) -> None: + """Initialise the FileDownloader object. + + Args: + progress (DownloadProgress): The progress bar object to use for tracking downloads. + """ + self.progress = progress + self.kill_with_fire = False + + def parse_future_status(self, future: concurrent.futures.Future) -> Status: + """Parse the status of a future object.""" + if future.running(): + return self.Status.RUNNING + if future.cancelled(): + return self.Status.CANCELLED + if future.done(): + if future.exception(): + return self.Status.ERROR + return self.Status.DONE + return self.Status.PENDING + + def nice_name(self, remote_path: str) -> str: + # The final part of a singularity image is a data directory, which is not very informative + # so we use the second to last part which is a hash + parts = remote_path.split("/") + if parts[-1] == "data": + return parts[-2][:50] + else: + return parts[-1][:50] + + def download_files_in_parallel( + self, + download_files: Iterable[tuple[str, Path]], + parallel_downloads: int, + callback: Optional[Callable[[tuple[str, Path], Status], None]] = None, + ) -> list[tuple[str, Path]]: + """Download multiple files in parallel. + + Args: + download_files (Iterable[tuple[str, str]]): list of tuples with the remote URL and the local output path. + parallel_downloads (int): Number of parallel downloads to run. + callback (Callable[[tuple[str, str], Status], None]): Optional allback function to call after each download. + The function must take two arguments: the download tuple and the status of the download thread. + """ + + # Make ctrl-c work with multi-threading + self.kill_with_fire = False + + # Track the download threads + future_downloads: dict[concurrent.futures.Future, tuple[str, Path]] = {} + + # list to store *successful* downloads + successful_downloads = [] + + def successful_download_callback(future: concurrent.futures.Future) -> None: + if future.done() and not future.cancelled() and future.exception() is None: + successful_downloads.append(future_downloads[future]) + + with concurrent.futures.ThreadPoolExecutor(max_workers=parallel_downloads) as pool: + # The entire block needs to be monitored for KeyboardInterrupt so that ntermediate files + # can be cleaned up properly. + try: + for input_params in download_files: + (remote_path, output_path) = input_params + # Create the download thread as a Future object + future = pool.submit(self.download_file, remote_path, output_path) + future_downloads[future] = input_params + # Callback to record successful downloads + future.add_done_callback(successful_download_callback) + # User callback function (if provided) + if callback: + future.add_done_callback(lambda f: callback(future_downloads[f], self.parse_future_status(f))) + + completed_futures = concurrent.futures.wait( + future_downloads, return_when=concurrent.futures.ALL_COMPLETED + ) + # Get all the exceptions and exclude BaseException-based ones (e.g. KeyboardInterrupt) + exceptions = [ + exc for exc in (f.exception() for f in completed_futures.done) if isinstance(exc, Exception) + ] + if exceptions: + raise DownloadError("Download errors", exceptions) + + except KeyboardInterrupt: + # Cancel the future threads that haven't started yet + for future in future_downloads: + future.cancel() + # Set the variable that the threaded function looks for + # Will trigger an exception from each active thread + self.kill_with_fire = True + # Re-raise exception on the main thread + raise + + return successful_downloads + + def download_file(self, remote_path: str, output_path: Path) -> None: + """Download a file from the web. + + Use native Python to download the file. Progress is shown in the progress bar + as a new task (of type "download"). + + This method is integrated with the above `download_files_in_parallel` method. The + `self.kill_with_fire` variable is a sentinel used to check if the user has hit ctrl-c. + + Args: + remote_path (str): Source URL of the file to download + output_path (str): The target output path + """ + log.debug(f"Downloading '{remote_path}' to '{output_path}'") + + # Set up download progress bar as a new task + nice_name = self.nice_name(remote_path) + + with self.progress.sub_task(nice_name, start=False, total=False, progress_type="download") as task: + # Open file handle and download + # This temporary will be automatically renamed to the target if there are no errors + with intermediate_file(output_path) as fh: + # Disable caching as this breaks streamed downloads + with requests_cache.disabled(): + r = requests.get(remote_path, allow_redirects=True, stream=True, timeout=60 * 5) + filesize = r.headers.get("Content-length") + if filesize: + self.progress.update(task, total=int(filesize)) + self.progress.start_task(task) + + # Stream download + has_content = False + for data in r.iter_content(chunk_size=io.DEFAULT_BUFFER_SIZE): + # Check that the user didn't hit ctrl-c + if self.kill_with_fire: + raise KeyboardInterrupt + self.progress.update(task, advance=len(data)) + fh.write(data) + has_content = True + + # Check that we actually downloaded something + if not has_content: + raise DownloadError(f"Downloaded file '{remote_path}' is empty") diff --git a/nf_core/pipelines/download/utils.py b/nf_core/pipelines/download/utils.py new file mode 100644 index 0000000000..10133408b7 --- /dev/null +++ b/nf_core/pipelines/download/utils.py @@ -0,0 +1,62 @@ +import contextlib +import logging +import tempfile +from collections.abc import Generator +from pathlib import Path + +log = logging.getLogger(__name__) + + +class DownloadError(RuntimeError): + """A custom exception that is raised when nf-core pipelines download encounters a problem that we already took into consideration. + In this case, we do not want to print the traceback, but give the user some concise, helpful feedback instead. + """ + + +@contextlib.contextmanager +def intermediate_file(output_path: Path) -> Generator[tempfile._TemporaryFileWrapper, None, None]: + """Context manager to help ensure the output file is either complete or non-existent. + It does that by creating a temporary file in the same directory as the output file, + letting the caller write to it, and then moving it to the final location. + If an exception is raised, the temporary file is deleted and the output file is not touched. + """ + if output_path.is_dir(): + raise DownloadError(f"Output path '{output_path}' is a directory") + if output_path.is_symlink(): + raise DownloadError(f"Output path '{output_path}' is a symbolic link") + + tmp = tempfile.NamedTemporaryFile(dir=output_path.parent, delete=False) + try: + yield tmp + tmp.close() + Path(tmp.name).rename(output_path) + except: + tmp_path = Path(tmp.name) + if tmp_path.exists(): + tmp_path.unlink() + raise + + +@contextlib.contextmanager +def intermediate_file_no_creation(output_path: Path) -> Generator[Path, None, None]: + """ + Context manager to help ensure the output file is either complete or non-existent. + + 'singularity/apptainer pull' requires that the output file does not exist before it is run. + For pulling container we therefore create a temporary directory with and write to a file named + 'tempfile' in it. If the pull command is successful, we rename the temporary file to the output path. + """ + if output_path.is_dir(): + raise DownloadError(f"Output path '{output_path}' is a directory") + if output_path.is_symlink(): + raise DownloadError(f"Output path '{output_path}' is a symbolic link") + + tmp = tempfile.TemporaryDirectory(dir=output_path.parent) + tmp_fn = Path(tmp.name) / "tempfile" + try: + yield tmp_fn + Path(tmp.name).rename(output_path) + tmp.cleanup() + except: + tmp.cleanup() + raise diff --git a/nf_core/pipelines/download/workflow_repo.py b/nf_core/pipelines/download/workflow_repo.py new file mode 100644 index 0000000000..184555ac19 --- /dev/null +++ b/nf_core/pipelines/download/workflow_repo.py @@ -0,0 +1,300 @@ +import logging +import os +import re +import shutil +from pathlib import Path + +import git +import rich +from git.exc import GitCommandError, InvalidGitRepositoryError +from packaging.version import Version + +import nf_core +import nf_core.modules.modules_utils +from nf_core.pipelines.download.utils import DownloadError +from nf_core.synced_repo import RemoteProgressbar, SyncedRepo +from nf_core.utils import ( + NFCORE_CACHE_DIR, + NFCORE_DIR, +) + +log = logging.getLogger(__name__) + + +class WorkflowRepo(SyncedRepo): + """ + An object to store details about a locally cached workflow repository. + + Important Attributes: + fullname: The full name of the repository, ``nf-core/{self.pipelinename}``. + local_repo_dir (str): The local directory, where the workflow is cloned into. Defaults to ``$HOME/.cache/nf-core/nf-core/{self.pipeline}``. + + """ + + def __init__( + self, + remote_url, + revision, + commit, + additional_tags, + location=None, + hide_progress=False, + in_cache=True, + ): + """ + Initializes the object and clones the workflows git repository if it is not already present + + Args: + remote_url (str): The URL of the remote repository. Defaults to None. + self.revision (list of str): The revisions to include. A list of strings. + commits (dict of str): The checksums to linked with the revisions. + no_pull (bool, optional): Whether to skip the pull step. Defaults to False. + hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. + in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. + """ + self.remote_url = remote_url + if isinstance(revision, str): + self.revision = [revision] + elif isinstance(revision, list): + self.revision = [*revision] + else: + self.revision = [] + if isinstance(commit, str): + self.commit = [commit] + elif isinstance(commit, list): + self.commit = [*commit] + else: + self.commit = [] + self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) + self.retries = 0 # retries for setting up the locally cached repository + self.hide_progress = hide_progress + + self.setup_local_repo(remote=remote_url, location=location, in_cache=in_cache) + + # additional tags to be added to the repository + self.additional_tags = additional_tags if additional_tags else None + + def __repr__(self): + """Called by print, creates representation of object""" + return f"" + + @property + def heads(self): + return self.repo.heads + + @property + def tags(self): + return self.repo.tags + + def access(self): + if self.local_repo_dir.exists(): + return self.local_repo_dir + else: + return None + + def checkout(self, commit): + return super().checkout(commit) + + def get_remote_branches(self, remote_url): + return super().get_remote_branches(remote_url) + + def retry_setup_local_repo(self, skip_confirm=False): + self.retries += 1 + if skip_confirm or rich.prompt.Confirm.ask( + f"[violet]Delete local cache '{self.local_repo_dir}' and try again?" + ): + if ( + self.retries > 1 + ): # One unconfirmed retry is acceptable, but prevent infinite loops without user interaction. + raise DownloadError( + f"Errors with locally cached repository of '{self.fullname}'. Please delete '{self.local_repo_dir}' manually and try again." + ) + if not skip_confirm: # Feedback to user for manual confirmation. + log.info(f"Removing '{self.local_repo_dir}'") + shutil.rmtree(self.local_repo_dir) + self.setup_local_repo(self.remote_url, in_cache=False) + else: + raise DownloadError("Exiting due to error with locally cached Git repository.") + + def setup_local_repo(self, remote, location=None, in_cache=True): + """ + Sets up the local git repository. If the repository has been cloned previously, it + returns a git.Repo object of that clone. Otherwise it tries to clone the repository from + the provided remote URL and returns a git.Repo of the new clone. + + Args: + remote (str): git url of remote + location (Path): location where the clone should be created/cached. + in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. + Sets self.repo + """ + if location: + self.local_repo_dir = location / self.fullname + else: + self.local_repo_dir = Path(NFCORE_DIR) if not in_cache else Path(NFCORE_CACHE_DIR, self.fullname) + + try: + if not self.local_repo_dir.exists(): + try: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=os.environ.get("HIDE_PROGRESS", None) is not None or self.hide_progress, + ) + with pbar: + self.repo = git.Repo.clone_from( + remote, + self.local_repo_dir, + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), + ) + super().update_local_repo_status(self.fullname, True) + except GitCommandError: + raise DownloadError(f"Failed to clone from the remote: `{remote}`") + else: + self.repo = git.Repo(self.local_repo_dir) + + if super().no_pull_global: + super().update_local_repo_status(self.fullname, True) + # If the repo is already cloned, fetch the latest changes from the remote + if not super().local_repo_synced(self.fullname): + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=os.environ.get("HIDE_PROGRESS", None) is not None or self.hide_progress, + ) + with pbar: + self.repo.remotes.origin.fetch( + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") + ) + super().update_local_repo_status(self.fullname, True) + + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") + self.retry_setup_local_repo() + + def tidy_tags_and_branches(self): + """ + Function to delete all tags and branches that are not of interest to the downloader. + This allows a clutter-free experience in Seqera Platform. The untagged commits are evidently still available. + + However, due to local caching, the downloader might also want access to revisions that had been deleted before. + In that case, don't bother with re-adding the tags and rather download anew from Github. + """ + if self.revision and self.repo and self.repo.tags: + # create a set to keep track of the revisions to process & check + desired_revisions = set(self.revision) + + # determine what needs pruning + tags_to_remove = {tag for tag in self.repo.tags if tag.name not in desired_revisions.union({"latest"})} + heads_to_remove = {head for head in self.repo.heads if head.name not in desired_revisions.union({"latest"})} + + try: + # delete unwanted tags from repository + for tag in tags_to_remove: + self.repo.delete_tag(tag) + + # switch to a revision that should be kept, because deleting heads fails, if they are checked out (e.g. "main") + self.checkout(self.revision[0]) + + # delete unwanted heads/branches from repository + for head in heads_to_remove: + self.repo.delete_head(head) + + # ensure all desired revisions/branches are available + for revision in desired_revisions: + if not self.repo.is_valid_object(revision): + self.checkout(revision) + self.repo.create_head(revision, revision) + if self.repo.head.is_detached: + self.repo.head.reset(index=True, working_tree=True) + + # no branch exists, but one is required for Seqera Platform's UI to display revisions correctly). Thus, "latest" will be created. + if not bool(self.repo.heads): + if self.repo.is_valid_object("latest"): + # "latest" exists as tag but not as branch + self.repo.create_head("latest", "latest") # create a new head for latest + self.checkout("latest") + else: + # desired revisions may contain arbitrary branch names that do not correspond to valid semantic versioning patterns. + valid_versions = [ + Version(v) for v in desired_revisions if re.match(r"\d+\.\d+(?:\.\d+)*(?:[\w\-_])*", v) + ] + # valid versions sorted in ascending order, last will be aliased as "latest". + latest = sorted(valid_versions)[-1] + self.repo.create_head("latest", str(latest)) + self.checkout(latest) + if self.repo.head.is_detached: + self.repo.head.reset(index=True, working_tree=True) + + # Apply the custom additional tags to the repository + self.__add_additional_tags() + + # get all tags and available remote_branches + completed_revisions = {revision.name for revision in self.repo.heads + self.repo.tags} + + # verify that all requested revisions are available. + # a local cache might lack revisions that were deleted during a less comprehensive previous download. + if bool(desired_revisions - completed_revisions): + log.info( + f"Locally cached version of the pipeline lacks selected revisions {', '.join(desired_revisions - completed_revisions)}. Downloading anew from GitHub..." + ) + self.retry_setup_local_repo(skip_confirm=True) + self.tidy_tags_and_branches() + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Adapting your pipeline download unfortunately failed:[/]\n{e}\n") + self.retry_setup_local_repo(skip_confirm=True) + raise DownloadError(e) from e + + # "Private" method to add the additional custom tags to the repository. + def __add_additional_tags(self) -> None: + if self.additional_tags: + # example.com is reserved by the Internet Assigned Numbers Authority (IANA) as special-use domain names for documentation purposes. + # Although "dev-null" is a syntactically-valid local-part that is equally valid for delivery, + # and only the receiving MTA can decide whether to accept it, it is to my best knowledge configured with + # a Postfix discard mail delivery agent (https://www.postfix.org/discard.8.html), so incoming mails should be sinkholed. + self.ensure_git_user_config( + f"nf-core pipelines download v{nf_core.__version__}", + "dev-null@example.com", + ) + + for additional_tag in self.additional_tags: + # A valid git branch or tag name can contain alphanumeric characters, underscores, hyphens, and dots. + # But it must not start with a dot, hyphen or underscore and also cannot contain two consecutive dots. + if re.match(r"^\w[\w_.-]+={1}\w[\w_.-]+$", additional_tag) and ".." not in additional_tag: + anchor, tag = additional_tag.split("=") + if self.repo.is_valid_object(anchor) and not self.repo.is_valid_object(tag): + try: + self.repo.create_tag( + tag, + ref=anchor, + message=f"Synonynmous tag to {anchor}; added by `nf-core pipelines download`.", + ) + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Additional tag(s) could not be applied:[/]\n{e}\n") + else: + if not self.repo.is_valid_object(anchor): + log.error( + f"[red]Adding tag '{tag}' to '{anchor}' failed.[/]\n Mind that '{anchor}' must be a valid git reference that resolves to a commit." + ) + if self.repo.is_valid_object(tag): + log.error( + f"[red]Adding tag '{tag}' to '{anchor}' failed.[/]\n Mind that '{tag}' must not exist hitherto." + ) + else: + log.error(f"[red]Could not apply invalid `--tag` specification[/]: '{additional_tag}'") + + def bare_clone(self, destination: Path): + if self.repo: + try: + destfolder = destination.parent.absolute() + if not destfolder.exists(): + destfolder.mkdir() + if destination.exists(): + shutil.rmtree(destination) + self.repo.clone(str(destfolder), bare=True) + except (OSError, GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Failure to create the pipeline download[/]\n{e}\n") diff --git a/nf_core/utils.py b/nf_core/utils.py index 435d4c2bd6..fe9ec413d7 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -266,6 +266,74 @@ def is_pipeline_directory(wf_path): raise UserWarning(warning) +# This is the minimal version of Nextflow required to fetch containers with `nextflow inspect` +NF_INSPECT_MIN_NF_VERSION = (25, 4, 4, False) + +# This is the maximal version of nf-core/tools that does not require `nextflow inspect` for downloads +NFCORE_VER_LAST_WITHOUT_NF_INSPECT = (3, 3, 2) + + +# Pretty print a Nextflow version tuple +def pretty_nf_version(version: tuple[int, int, int, bool]) -> str: + return f"{version[0]}.{version[1]:02}.{version[2]}" + ("-edge" if version[3] else "") + + +def get_nf_version() -> Optional[tuple[int, int, int, bool]]: + """Get the version of Nextflow installed on the system.""" + try: + cmd_out = run_cmd("nextflow", "-v") + if cmd_out is None: + raise RuntimeError("Failed to run Nextflow version check.") + out, _ = cmd_out + out_str = str(out, encoding="utf-8") # Ensure we have a string + + version_str = out_str.strip().split()[-1] + + # Check if we are using an edge release + is_edge = False + edge_split = version_str.split("-") + if len(edge_split) > 1: + is_edge = True + version_str = edge_split[0] + + split_version_str = version_str.split(".") + parsed_version_tuple = ( + int(split_version_str[0]), + int(split_version_str[1]), + int(split_version_str[2]), + is_edge, + ) + return parsed_version_tuple + except Exception as e: + log.warning(f"Error getting Nextflow version: {e}") + return None + + +# Check that the Nextflow version >= the minimal version required +# This is used to ensure that we can run `nextflow inspect` +def check_nextflow_version(minimal_nf_version: tuple[int, int, int, bool], silent=False) -> bool: + """Check the version of Nextflow installed on the system. + + Args: + minimal_nf_version (tuple[int, int, int, bool]): The minimal version of Nextflow required. + silent (bool): Whether to log the version or not. + Returns: + bool: True if the installed version is greater than or equal to `minimal_nf_version` + """ + nf_version = get_nf_version() + if nf_version is None: + return False + + parsed_version_str = pretty_nf_version(nf_version) + + if silent: + log.debug(f"Detected Nextflow version {parsed_version_str}") + else: + log.info(f"Detected Nextflow version {parsed_version_str}") + + return nf_version >= minimal_nf_version + + def fetch_wf_config(wf_path: Path, cache_config: bool = True) -> dict: """Uses Nextflow to retrieve the the configuration variables from a Nextflow workflow. diff --git a/tests/data/mock_config_containers/nextflow.config b/tests/data/mock_config_containers/nextflow.config deleted file mode 100644 index 4195a3e68d..0000000000 --- a/tests/data/mock_config_containers/nextflow.config +++ /dev/null @@ -1,37 +0,0 @@ - - -// example from methylseq 1.0 -params.container = 'nfcore/methylseq:1.0' - -// example from methylseq 1.4 [Mercury Rattlesnake] -process.container = 'nfcore/methylseq:1.4' - -process { - - // example from Sarek 2.5 - - withName:Snpeff { - container = {(params.annotation_cache && params.snpEff_cache) ? 'nfcore/sarek:dev' : "nfcore/sareksnpeff:dev.${params.genome}"} - errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} - } - withLabel:VEP { - container = {(params.annotation_cache && params.vep_cache) ? 'nfcore/sarek:dev' : "nfcore/sarekvep:dev.${params.genome}"} - errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} - } - - // example from differentialabundance 1.2.0 - - withName: RMARKDOWNNOTEBOOK { - conda = "bioconda::r-shinyngs=1.7.1" - container = { "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1':'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" } - } - - // example from nf-core/pairgenomealign 2.2.0 - - withName:'ALIGNMENT_.*' { - container = { "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/06/06beccfa4d48e5daf30dd8cee4f7e06fd51594963db0d5087ab695365b79903b/data' : - 'community.wave.seqera.io/library/last_samtools_open-fonts:176a6ab0c8171057'}" } - } - -} diff --git a/tests/data/mock_module_containers/modules/mock_docker_single_quay_io.nf b/tests/data/mock_module_containers/modules/mock_docker_single_quay_io.nf deleted file mode 100644 index fad0e26e3d..0000000000 --- a/tests/data/mock_module_containers/modules/mock_docker_single_quay_io.nf +++ /dev/null @@ -1,8 +0,0 @@ -process MOCK { - label 'process_fake' - - conda (params.enable_conda ? "bioconda::singlequay=1.9" : null) - container "quay.io/biocontainers/singlequay:1.9--pyh9f0ad1d_0" - - // truncated -} diff --git a/tests/data/mock_module_containers/modules/mock_dsl2_apptainer_var1.nf b/tests/data/mock_module_containers/modules/mock_dsl2_apptainer_var1.nf deleted file mode 100644 index c92f69b42c..0000000000 --- a/tests/data/mock_module_containers/modules/mock_dsl2_apptainer_var1.nf +++ /dev/null @@ -1,10 +0,0 @@ -process MOCK { - label 'process_fake' - - conda "bioconda::dsltwoapptainervarone=1.1.0" - container "${ (workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer') && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/dsltwoapptainervarone:1.1.0--py38h7be5676_2': - 'biocontainers/dsltwoapptainervarone:1.1.0--py38h7be5676_2' }" - - // truncated -} diff --git a/tests/data/mock_module_containers/modules/mock_dsl2_apptainer_var2.nf b/tests/data/mock_module_containers/modules/mock_dsl2_apptainer_var2.nf deleted file mode 100644 index 412c73d285..0000000000 --- a/tests/data/mock_module_containers/modules/mock_dsl2_apptainer_var2.nf +++ /dev/null @@ -1,10 +0,0 @@ -process MOCK { - label 'process_fake' - - conda "bioconda::dsltwoapptainervartwo=1.1.0" - container "${ ['singularity', 'apptainer'].contains(workflow.containerEngine) && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/dsltwoapptainervartwo:1.1.0--hdfd78af_0': - 'biocontainers/dsltwoapptainervartwo:1.1.0--hdfd78af_0' }" - - // truncated -} diff --git a/tests/data/mock_module_containers/modules/mock_dsl2_current.nf b/tests/data/mock_module_containers/modules/mock_dsl2_current.nf deleted file mode 100644 index 65cd8086ac..0000000000 --- a/tests/data/mock_module_containers/modules/mock_dsl2_current.nf +++ /dev/null @@ -1,10 +0,0 @@ -process MOCK { - label 'process_fake' - - conda "bioconda::dsltwocurrent=1.2.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/dsltwocurrent:1.2.1--pyhdfd78af_0': - 'biocontainers/dsltwocurrent:1.2.1--pyhdfd78af_0' }" - - // truncated -} diff --git a/tests/data/mock_module_containers/modules/mock_dsl2_current_inverted.nf b/tests/data/mock_module_containers/modules/mock_dsl2_current_inverted.nf deleted file mode 100644 index d5a369c742..0000000000 --- a/tests/data/mock_module_containers/modules/mock_dsl2_current_inverted.nf +++ /dev/null @@ -1,10 +0,0 @@ -process MOCK { - label 'process_fake' - - conda "bioconda::dsltwocurrentinv=3.3.2" - container "${ !workflow.containerEngine == 'singularity' && task.ext.singularity_pull_docker_container ? - 'biocontainers/dsltwocurrentinv:3.3.2--h1b792b2_1' : - 'https://depot.galaxyproject.org/singularity/dsltwocurrentinv:3.3.2--h1b792b2_1' }" - - // truncated -} diff --git a/tests/data/mock_module_containers/modules/mock_dsl2_variable.nf b/tests/data/mock_module_containers/modules/mock_dsl2_variable.nf deleted file mode 100644 index 561254069a..0000000000 --- a/tests/data/mock_module_containers/modules/mock_dsl2_variable.nf +++ /dev/null @@ -1,19 +0,0 @@ -process STAR_ALIGN { - // from rnaseq 3.7 - label 'process_fake' - - conda (params.enable_conda ? conda_str : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - "https://depot.galaxyproject.org/singularity/${container_id}" : - "quay.io/biocontainers/${container_id}" }" - - - // Note: 2.7X indices incompatible with AWS iGenomes so use older STAR version - conda_str = "bioconda::star=2.7.10a bioconda::samtools=1.15.1 conda-forge::gawk=5.1.0" - container_id = 'mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0' - if (is_aws_igenome) { - conda_str = "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" - container_id = 'mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' - } - -} diff --git a/tests/data/mock_module_containers/modules/mock_seqera_container_http.nf b/tests/data/mock_module_containers/modules/mock_seqera_container_http.nf deleted file mode 100644 index 20c7075481..0000000000 --- a/tests/data/mock_module_containers/modules/mock_seqera_container_http.nf +++ /dev/null @@ -1,11 +0,0 @@ -process CAT_FASTQ { - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data' : - 'community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264' }" - - // truncated - -} diff --git a/tests/data/mock_module_containers/modules/mock_seqera_container_oras.nf b/tests/data/mock_module_containers/modules/mock_seqera_container_oras.nf deleted file mode 100644 index 8278ac7917..0000000000 --- a/tests/data/mock_module_containers/modules/mock_seqera_container_oras.nf +++ /dev/null @@ -1,11 +0,0 @@ -process UMI_TRANSFER { - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6' : - 'community.wave.seqera.io/library/umi-transfer:1.0.0--d30e8812ea280fa1' }" - - // truncated - -} diff --git a/tests/data/mock_module_containers/modules/mock_seqera_container_oras_mulled.nf b/tests/data/mock_module_containers/modules/mock_seqera_container_oras_mulled.nf deleted file mode 100644 index 234ca04a45..0000000000 --- a/tests/data/mock_module_containers/modules/mock_seqera_container_oras_mulled.nf +++ /dev/null @@ -1,11 +0,0 @@ -process UMI_TRANSFER_MULLED { - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/umi-transfer_umicollapse:796a995ff53da9e3' : - 'community.wave.seqera.io/library/umi-transfer_umicollapse:3298d4f1b49e33bd' }" - - // truncated - -} diff --git a/tests/data/mock_pipeline_containers/.nf-core.yml b/tests/data/mock_pipeline_containers/.nf-core.yml new file mode 100644 index 0000000000..3805dc81c1 --- /dev/null +++ b/tests/data/mock_pipeline_containers/.nf-core.yml @@ -0,0 +1 @@ +repository_type: pipeline diff --git a/tests/data/mock_pipeline_containers/conf/base.config b/tests/data/mock_pipeline_containers/conf/base.config new file mode 100644 index 0000000000..07aff59b98 --- /dev/null +++ b/tests/data/mock_pipeline_containers/conf/base.config @@ -0,0 +1,66 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/mock-pipeline Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ + +process { + + // TODO nf-core: Check the defaults for all processes + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + + errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Please try and reuse the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // TODO nf-core: Customise requirements for specific processes. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_low { + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_medium { + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } + } + withLabel:process_high { + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } + } + withLabel:process_long { + time = { 20.h * task.attempt } + } + withLabel:process_high_memory { + memory = { 200.GB * task.attempt } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + withLabel: process_gpu { + ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } + } +} diff --git a/tests/data/mock_pipeline_containers/conf/modules.config b/tests/data/mock_pipeline_containers/conf/modules.config new file mode 100644 index 0000000000..d203d2b6e6 --- /dev/null +++ b/tests/data/mock_pipeline_containers/conf/modules.config @@ -0,0 +1,34 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: FASTQC { + ext.args = '--quiet' + } + + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/tests/data/mock_pipeline_containers/conf/test.config b/tests/data/mock_pipeline_containers/conf/test.config new file mode 100755 index 0000000000..454e5d1d39 --- /dev/null +++ b/tests/data/mock_pipeline_containers/conf/test.config @@ -0,0 +1,44 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/rnaseq -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/samplesheet/v3.10/samplesheet_test.csv' + + // Genome references + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/reference/genome.fasta' + gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/reference/genes_with_empty_tid.gtf.gz' + gff = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/reference/genes.gff.gz' + transcript_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/reference/transcriptome.fasta' + additional_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/reference/gfp.fa.gz' + + bbsplit_fasta_list = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/reference/bbsplit_fasta_list.txt' + hisat2_index = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/reference/hisat2.tar.gz' + salmon_index = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/reference/salmon.tar.gz' + rsem_index = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/reference/rsem.tar.gz' + + // Other parameters + skip_bbsplit = false + pseudo_aligner = 'salmon' + umitools_bc_pattern = 'NNNN' +} diff --git a/tests/data/mock_pipeline_containers/conf/test_full.config b/tests/data/mock_pipeline_containers/conf/test_full.config new file mode 100755 index 0000000000..320b8156f0 --- /dev/null +++ b/tests/data/mock_pipeline_containers/conf/test_full.config @@ -0,0 +1,21 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/rnaseq -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Parameters for full-size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/626c8fab639062eade4b10747e919341cbf9b41a/samplesheet/v3.10/samplesheet_full.csv' + genome = 'GRCh37' + pseudo_aligner = 'salmon' +} diff --git a/tests/data/mock_pipeline_containers/main_passing_test.nf b/tests/data/mock_pipeline_containers/main_passing_test.nf new file mode 100644 index 0000000000..79fa95b17d --- /dev/null +++ b/tests/data/mock_pipeline_containers/main_passing_test.nf @@ -0,0 +1,77 @@ +#!/usr/bin/env nextflow +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/mock-pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Entrypoint for for a passing `nextflow inspect` test -- container directives should + be correctly caprtued by the `nextflow inspect` command. + + For verification purposes, the correct container for each tested profile the + container directives are kept in the `per_profile_output` directory. + +---------------------------------------------------------------------------------------- +*/ + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { PASSING } from './workflows/passing' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_mock-pipeline_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_mock-pipeline_pipeline' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// WORKFLOW: Run main analysis pipeline depending on type of input +// +workflow NFCORE_MOCK_PIPELINE { + take: + ch_mockery // channel: samplesheet read in from --input + + main: + ch_mockery = PASSING(ch_mockery) + + emit: + ch_mockery +} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow { + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION( + params.version, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input, + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_MOCK_PIPELINE( + PIPELINE_INITIALISATION.out.samplesheet + ) + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION( + params.outdir, + params.monochrome_logs, + NFCORE_MOCK_PIPELINE.out.ch_mockery, + ) +} diff --git a/tests/data/mock_pipeline_containers/modules.json b/tests/data/mock_pipeline_containers/modules.json new file mode 100644 index 0000000000..e5824fbb55 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules.json @@ -0,0 +1,17 @@ +{ + "name": "nf-core/mock-pipeline", + "homePage": "https://github.com/nf-core/mock-pipeline", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "rmarkdownnotebook": { + "branch": "master", + "git_sha": "ba9efe74bd993e9ce77441104e48120834d5886e", + "installed_by": ["modules"] + } + } + } + } + } +} diff --git a/tests/data/mock_pipeline_containers/modules/local/passing/mock_docker_single_quay_io/main.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_docker_single_quay_io/main.nf new file mode 100644 index 0000000000..45f8bdd320 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_docker_single_quay_io/main.nf @@ -0,0 +1,20 @@ +process MOCK_DOCKER_SINGLE_QUAY_IO { + label 'process_fake' + + conda params.enable_conda ? "bioconda::singlequay=1.9" : null + container "quay.io/biocontainers/singlequay:1.9--pyh9f0ad1d_0" + + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_apptainer_var1/main.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_apptainer_var1/main.nf new file mode 100644 index 0000000000..4accee9b71 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_apptainer_var1/main.nf @@ -0,0 +1,22 @@ +process MOCK_DSL2_APPTAINER_VAR1 { + label 'process_fake' + + conda "bioconda::dsltwoapptainervarone=1.1.0" + container "${(workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer') && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/dsltwoapptainervarone:1.1.0--py38h7be5676_2' + : 'biocontainers/dsltwoapptainervarone:1.1.0--py38h7be5676_2'}" + + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_apptainer_var2/main.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_apptainer_var2/main.nf new file mode 100644 index 0000000000..c2c21cc15c --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_apptainer_var2/main.nf @@ -0,0 +1,22 @@ +process MOCK_DSL2_APPTAINER_VAR2 { + label 'process_fake' + + conda "bioconda::dsltwoapptainervartwo=1.1.0" + container "${['singularity', 'apptainer'].contains(workflow.containerEngine) && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/dsltwoapptainervartwo:1.1.0--hdfd78af_0' + : 'biocontainers/dsltwoapptainervartwo:1.1.0--hdfd78af_0'}" + + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_current/main.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_current/main.nf new file mode 100644 index 0000000000..7b9d7f1ed5 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_current/main.nf @@ -0,0 +1,22 @@ +process MOCK_DSL2_CURRENT { + label 'process_fake' + + conda "bioconda::dsltwocurrent=1.2.1" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/dsltwocurrent:1.2.1--pyhdfd78af_0' + : 'biocontainers/dsltwocurrent:1.2.1--pyhdfd78af_0'}" + + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_current_inverted/main.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_current_inverted/main.nf new file mode 100644 index 0000000000..c5e63c8cb5 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_current_inverted/main.nf @@ -0,0 +1,22 @@ +process MOCK_DSL2_CURRENT_INVERTED { + label 'process_fake' + + conda "bioconda::dsltwocurrentinv=3.3.2" + container "${workflow.containerEngine != 'singularity' || task.ext.singularity_pull_docker_container + ? 'biocontainers/dsltwocurrentinv:3.3.2--h1b792b2_1' + : 'https://depot.galaxyproject.org/singularity/dsltwocurrentinv:3.3.2--h1b792b2_1'}" + + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ +} diff --git a/tests/data/mock_module_containers/modules/mock_dsl2_old.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_old/main.nf similarity index 63% rename from tests/data/mock_module_containers/modules/mock_dsl2_old.nf rename to tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_old/main.nf index 11eace3b1c..db08a99b8c 100644 --- a/tests/data/mock_module_containers/modules/mock_dsl2_old.nf +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_dsl2_old/main.nf @@ -1,4 +1,4 @@ -process MOCK { +process MOCK_DSL2_OLD { label 'process_fake' conda (params.enable_conda ? "bioconda::dsltwoold=0.23.0" : null) @@ -8,5 +8,18 @@ process MOCK { container "quay.io/biocontainers/dsltwoold:0.23.0--0" } - // truncated + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ + } diff --git a/tests/data/mock_pipeline_containers/modules/local/passing/mock_no_container/main.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_no_container/main.nf new file mode 100644 index 0000000000..54da6ea66b --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_no_container/main.nf @@ -0,0 +1,17 @@ +process MOCK_NO_CONTAINER { + label 'process_fake' + + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/local/passing/mock_seqera_container_http/main.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_seqera_container_http/main.nf new file mode 100644 index 0000000000..55e3b64748 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_seqera_container_http/main.nf @@ -0,0 +1,22 @@ +process MOCK_SEQERA_CONTAINER_HTTP { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data' + : 'community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264'}" + + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/local/passing/mock_seqera_container_oras/main.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_seqera_container_oras/main.nf new file mode 100644 index 0000000000..e81f7cf164 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_seqera_container_oras/main.nf @@ -0,0 +1,22 @@ +process MOCK_SEQERA_CONTAINER_ORAS { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6' + : 'community.wave.seqera.io/library/umi-transfer:1.0.0--d30e8812ea280fa1'}" + + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/local/passing/mock_seqera_container_oras_mulled/main.nf b/tests/data/mock_pipeline_containers/modules/local/passing/mock_seqera_container_oras_mulled/main.nf new file mode 100644 index 0000000000..abbfa91201 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/local/passing/mock_seqera_container_oras_mulled/main.nf @@ -0,0 +1,22 @@ +process MOCK_SEQERA_CONTAINER_ORAS_MULLED { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'oras://community.wave.seqera.io/library/umi-transfer_umicollapse:796a995ff53da9e3' + : 'community.wave.seqera.io/library/umi-transfer_umicollapse:3298d4f1b49e33bd'}" + + input: + val mock_val + + output: + path "*mockery.md", emit: report + + when: + task.ext.when == null || task.ext.when + + script: + """ + touch mockery.md + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/environment.yml b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/environment.yml new file mode 100644 index 0000000000..21fc637f7a --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::r-base=4.1.0 + - conda-forge::r-rmarkdown=2.9 + - conda-forge::r-yaml=2.2.1 diff --git a/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/main.nf b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/main.nf new file mode 100644 index 0000000000..43eac5bf30 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/main.nf @@ -0,0 +1,147 @@ +include { dump_params_yml; indent_code_block } from "./parametrize" + +process RMARKDOWNNOTEBOOK { + tag "$meta.id" + label 'process_low' + + //NB: You likely want to override this with a container containing all required + //dependencies for your analysis. The container at least needs to contain the + //yaml and rmarkdown R packages. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' : + 'biocontainers/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' }" + + input: + tuple val(meta), path(notebook) + val parameters + path input_files + + output: + tuple val(meta), path("*.html") , emit: report + tuple val(meta), path("*.parameterised.Rmd") , emit: parameterised_notebook, optional: true + tuple val(meta), path("artifacts/*") , emit: artifacts, optional: true + tuple val(meta), path("session_info.log") , emit: session_info + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def parametrize = (task.ext.parametrize == null) ? true : task.ext.parametrize + def implicit_params = (task.ext.implicit_params == null) ? true : task.ext.implicit_params + def meta_params = (task.ext.meta_params == null) ? true : task.ext.meta_params + + // Dump parameters to yaml file. + // Using a yaml file over using the CLI params because + // * no issue with escaping + // * allows to pass nested maps instead of just single values + def params_cmd = "" + def render_cmd = "" + if (parametrize) { + nb_params = [:] + if (implicit_params) { + nb_params["cpus"] = task.cpus + nb_params["artifact_dir"] = "artifacts" + nb_params["input_dir"] = "./" + } + if (meta_params) { + nb_params["meta"] = meta + } + nb_params += parameters + params_cmd = dump_params_yml(nb_params) + render_cmd = """\ + params = yaml::read_yaml('.params.yml') + + # Instead of rendering with params, produce a version of the R + # markdown with param definitions set, so the notebook itself can + # be reused + rmd_content <- readLines('${prefix}.Rmd') + + # Extract YAML content between the first two '---' + start_idx <- which(rmd_content == "---")[1] + end_idx <- which(rmd_content == "---")[2] + rmd_yaml_content <- paste(rmd_content[(start_idx+1):(end_idx-1)], collapse = "\\n") + rmd_params <- yaml::yaml.load(rmd_yaml_content) + + # Override the params + rmd_params[['params']] <- modifyList(rmd_params[['params']], params) + + # Recursive function to add 'value' to list elements, except for top-level + add_value_recursively <- function(lst, is_top_level = FALSE) { + if (!is.list(lst)) { + return(lst) + } + + lst <- lapply(lst, add_value_recursively) + if (!is_top_level) { + lst <- list(value = lst) + } + return(lst) + } + + # Reformat nested lists under 'params' to have a 'value' key recursively + rmd_params[['params']] <- add_value_recursively(rmd_params[['params']], is_top_level = TRUE) + + # Convert back to YAML string + updated_yaml_content <- as.character(yaml::as.yaml(rmd_params)) + + # Remove the old YAML content + rmd_content <- rmd_content[-((start_idx+1):(end_idx-1))] + + # Insert the updated YAML content at the right position + rmd_content <- append(rmd_content, values = unlist(strsplit(updated_yaml_content, split = "\\n")), after = start_idx) + + writeLines(rmd_content, '${prefix}.parameterised.Rmd') + + # Render based on the updated file + rmarkdown::render('${prefix}.parameterised.Rmd', output_file='${prefix}.html', envir = new.env()) + """ + } else { + render_cmd = "rmarkdown::render('${prefix}.Rmd', output_file='${prefix}.html')" + } + + """ + # Dump .params.yml heredoc (section will be empty if parametrization is disabled) + ${indent_code_block(params_cmd, 4)} + + # Create output directory + mkdir artifacts + + # Set parallelism for BLAS/MKL etc. to avoid over-booking of resources + export MKL_NUM_THREADS="$task.cpus" + export OPENBLAS_NUM_THREADS="$task.cpus" + export OMP_NUM_THREADS="$task.cpus" + + # Work around https://github.com/rstudio/rmarkdown/issues/1508 + # If the symbolic link is not replaced by a physical file + # output- and temporary files will be written to the original directory. + mv "${notebook}" "${notebook}.orig" + cp -L "${notebook}.orig" "${prefix}.Rmd" + + # Render notebook + Rscript - < versions.yml + "${task.process}": + rmarkdown: \$(Rscript -e "cat(paste(packageVersion('rmarkdown'), collapse='.'))") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch session_info.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rmarkdown: \$(Rscript -e "cat(paste(packageVersion('rmarkdown'), collapse='.'))") + END_VERSIONS + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/meta.yml b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/meta.yml new file mode 100644 index 0000000000..eeb0eb362c --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/meta.yml @@ -0,0 +1,99 @@ +name: rmarkdownnotebook +description: Render an rmarkdown notebook. Supports parametrization. +keywords: + - R + - notebook + - reports +tools: + - rmarkdown: + description: Dynamic Documents for R + homepage: https://rmarkdown.rstudio.com/ + documentation: https://rmarkdown.rstudio.com/lesson-1.html + tool_dev_url: https://github.com/rstudio/rmarkdown + licence: ["GPL-3"] + identifier: "" +params: + - parametrize: + type: boolean + description: If true, parametrize the notebook + - implicit_params: + type: boolean + description: | + If true (default), include the implicit params + * `input_dir`, which points to the directory containing the files added via `input_files`, + * `artifact_dir`, which points to the directory where the notebook should place output files, and + * `cpus`, which contains the value of ${task.cpus} + - meta_params: + type: boolean + description: | + If true, include a parameter `meta` which contains the information specified + via the `meta` input channel. +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - notebook: + type: file + description: Rmarkdown file + pattern: "*.{Rmd}" + - - parameters: + type: map + description: | + Groovy map with notebook parameters which will be passed to + rmarkdown to generate parametrized reports. + - - input_files: + type: file + description: One or multiple files serving as input data for the notebook. + pattern: "*" +output: + - report: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: HTML report generated from Rmarkdown + pattern: "*.html" + - parameterised_notebook: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.parameterised.Rmd": + type: file + description: Parameterised Rmarkdown file + pattern: "*.parameterised.Rmd" + - artifacts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - artifacts/*: + type: file + description: Artifacts generated by the notebook + pattern: "artifacts/*" + - session_info: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - session_info.log: + type: file + description: dump of R SessionInfo + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@grst" +maintainers: + - "@grst" diff --git a/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/parametrize.nf b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/parametrize.nf new file mode 100644 index 0000000000..05e259ebcf --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/parametrize.nf @@ -0,0 +1,36 @@ +import org.yaml.snakeyaml.Yaml +import org.yaml.snakeyaml.DumperOptions + + +/** + * Multiline code blocks need to have the same indentation level + * as the `script:` section. This function re-indents code to the specified level. + */ +def indent_code_block(code, n_spaces) { + def indent_str = " ".multiply(n_spaces) + return code.stripIndent().split("\n").join("\n" + indent_str) +} + +/** + * Create a config YAML file from a groovy map + * + * @params task The process' `task` variable + * @returns a line to be inserted in the bash script. + */ +def dump_params_yml(params) { + DumperOptions options = new DumperOptions(); + options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK); + def yaml = new Yaml(options) + def yaml_str = yaml.dump(params) + + // Writing the .params.yml file directly as follows does not work. + // It only works in 'exec:', but not if there is a `script:` section: + // task.workDir.resolve('.params.yml').text = yaml_str + + // Therefore, we inject it into the bash script: + return """\ + cat <<"END_PARAMS_SECTION" > ./.params.yml + ${indent_code_block(yaml_str, 8)} + END_PARAMS_SECTION + """ +} diff --git a/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/tests/main.nf.test b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/tests/main.nf.test new file mode 100644 index 0000000000..2657be7dfa --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/tests/main.nf.test @@ -0,0 +1,100 @@ +nextflow_process { + + name "Test Process RMARKDOWNNOTEBOOK" + script "../main.nf" + process "RMARKDOWNNOTEBOOK" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "rmarkdownnotebook" + + test("test_rmarkdownnotebook") { + + when { + params{ + module_args = false + } + process { + """ + input[0] = [ [ id:'test_rmd' ], file(params.test_data['generic']['notebooks']['rmarkdown'], checkIfExists: true) ] + input[1] = [:] + input[2] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.report[0][1]).name, + process.out.parameterised_notebook, + process.out.artifacts, + file(process.out.session_info[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("test_rmarkdownnotebook_parametrize") { + + when { + params{ + module_args = true + } + process { + """ + input[0] = [ [ id:'test_rmd' ], file(params.test_data['generic']['notebooks']['rmarkdown'], checkIfExists: true) ] + input[1] = [input_filename: "hello.txt", n_iter: 12] + input[2] = file(params.test_data['generic']['txt']['hello'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.report[0][1]).name, + process.out.parameterised_notebook, + process.out.artifacts, + file(process.out.session_info[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("test_rmarkdownnotebook - stub") { + + options "-stub" + + when { + params{ + module_args = false + } + process { + """ + input[0] = [ [ id:'test_rmd' ], file(params.test_data['generic']['notebooks']['rmarkdown'], checkIfExists: true) ] + input[1] = [:] + input[2] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.report[0][1]).name, + process.out.parameterised_notebook, + process.out.artifacts, + file(process.out.session_info[0][1]).name, + process.out.versions + ).match() } + ) + } + } + +} diff --git a/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/tests/main.nf.test.snap b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/tests/main.nf.test.snap new file mode 100644 index 0000000000..ad932e1ab9 --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "test_rmarkdownnotebook": { + "content": [ + "test_rmd.html", + [ + + ], + [ + + ], + "session_info.log", + [ + "versions.yml:md5,d28a4d9ee45d7823aa58dbbda1a5b930" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-17T12:51:28.269874594" + }, + "test_rmarkdownnotebook_parametrize": { + "content": [ + "test_rmd.html", + [ + [ + { + "id": "test_rmd" + }, + "test_rmd.parameterised.Rmd:md5,59e184e50aadb66a821a2acce6a7c27c" + ] + ], + [ + [ + { + "id": "test_rmd" + }, + "artifact.txt:md5,b10a8db164e0754105b7a99be72e3fe5" + ] + ], + "session_info.log", + [ + "versions.yml:md5,d28a4d9ee45d7823aa58dbbda1a5b930" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-17T12:51:36.802094082" + }, + "test_rmarkdownnotebook - stub": { + "content": [ + "test_rmd.html", + [ + + ], + [ + + ], + "session_info.log", + [ + "versions.yml:md5,d28a4d9ee45d7823aa58dbbda1a5b930" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-17T12:51:43.527775598" + } +} \ No newline at end of file diff --git a/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/tests/nextflow.config b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/tests/nextflow.config new file mode 100644 index 0000000000..9f6cb6e2bc --- /dev/null +++ b/tests/data/mock_pipeline_containers/modules/nf-core/rmarkdownnotebook/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.parametrize = params.module_args +} diff --git a/tests/data/mock_pipeline_containers/nextflow.config b/tests/data/mock_pipeline_containers/nextflow.config new file mode 100644 index 0000000000..a031baae31 --- /dev/null +++ b/tests/data/mock_pipeline_containers/nextflow.config @@ -0,0 +1,243 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/mock-pipeline Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ + +// Global default params, used in configs +params { + + // TODO nf-core: Specify your pipeline's command line flags + // Input options + input = null + + // Boilerplate options + outdir = null + version = false + trace_report_suffix = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + + // Set up params for testing containers defined in config + annotation_cache = true + snpEff_cache = false + snpEff_cache = true +} + +// example from methylseq 1.0 +params.container = 'nfcore/methylseq:1.0' + +// example from methylseq 1.4 [Mercury Rattlesnake] +process.container = 'nfcore/methylseq:1.4' + +process { + + // example from Sarek 2.5 + + withName: Snpeff { + container = { params.annotation_cache && params.snpEff_cache ? 'nfcore/sarek:dev' : "nfcore/sareksnpeff:dev.${params.genome}" } + errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' } + } + withLabel: VEP { + container = { params.annotation_cache && params.vep_cache ? 'nfcore/sarek:dev' : "nfcore/sarekvep:dev.${params.genome}" } + errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' } + } + + // example from differentialabundance 1.2.0 + + withName: RMARKDOWNNOTEBOOK { + conda = "bioconda::r-shinyngs=1.7.1" + container = { "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1' : 'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1'}" } + } +} + +process { + // TODO nf-core: Check the defaults for all processes + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + + // 175 signals that the Pipeline had an unrecoverable error while + // restoring a Snapshot via Fusion Snapshots. + errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' +} + +profiles { + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + docker { + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + podman { + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + gpu { + docker.runOptions = '-u $(id -u):$(id -g) --gpus all' + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' + } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } +} + + + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' + + + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" +} + +// Set bash options +process.shell = [ + "bash", + "-C", + "-e", + "-u", + "-o", + "pipefail", +] + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + +timeline { + enabled = true + file = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_report_suffix}.html" +} +report { + enabled = true + file = "${params.outdir}/pipeline_info/execution_report_${params.trace_report_suffix}.html" +} +trace { + enabled = true + file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_report_suffix}.txt" +} +dag { + enabled = true + file = "${params.outdir}/pipeline_info/pipeline_dag_${params.trace_report_suffix}.html" +} + +manifest { + name = 'nf-core/mock-pipeline' + contributors = [ + [ + name: 'fds', + affiliation: '', + email: '', + github: '', + contribution: [], + orcid: '', + ] + ] + homePage = 'https://github.com/nf-core/mock-pipeline' + description = """dfsfjl""" + mainScript = 'main.nf' + defaultBranch = 'master' + nextflowVersion = '!>=25.04.4' + version = '1.0.0dev' + doi = '' +} diff --git a/tests/data/mock_pipeline_containers/per_profile_output/docker_containers.json b/tests/data/mock_pipeline_containers/per_profile_output/docker_containers.json new file mode 100644 index 0000000000..64ca108646 --- /dev/null +++ b/tests/data/mock_pipeline_containers/per_profile_output/docker_containers.json @@ -0,0 +1,11 @@ +{ + "MOCK_DOCKER_SINGLE_QUAY_IO": "quay.io/biocontainers/singlequay:1.9--pyh9f0ad1d_0", + "MOCK_DSL2_APPTAINER_VAR1": "quay.io/biocontainers/dsltwoapptainervarone:1.1.0--py38h7be5676_2", + "MOCK_DSL2_APPTAINER_VAR2": "quay.io/biocontainers/dsltwoapptainervartwo:1.1.0--hdfd78af_0", + "MOCK_DSL2_CURRENT": "quay.io/biocontainers/dsltwocurrent:1.2.1--pyhdfd78af_0", + "MOCK_DSL2_CURRENT_INVERTED": "quay.io/biocontainers/dsltwocurrentinv:3.3.2--h1b792b2_1", + "MOCK_DSL2_OLD": "quay.io/biocontainers/dsltwoold:0.23.0--0", + "MOCK_SEQERA_CONTAINER_HTTP": "community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264", + "MOCK_SEQERA_CONTAINER_ORAS": "community.wave.seqera.io/library/umi-transfer:1.0.0--d30e8812ea280fa1", + "MOCK_SEQERA_CONTAINER_ORAS_MULLED": "community.wave.seqera.io/library/umi-transfer_umicollapse:3298d4f1b49e33bd" +} diff --git a/tests/data/mock_pipeline_containers/per_profile_output/singularity_containers.json b/tests/data/mock_pipeline_containers/per_profile_output/singularity_containers.json new file mode 100644 index 0000000000..765ca9a8a4 --- /dev/null +++ b/tests/data/mock_pipeline_containers/per_profile_output/singularity_containers.json @@ -0,0 +1,11 @@ +{ + "MOCK_DOCKER_SINGLE_QUAY_IO": "quay.io/biocontainers/singlequay:1.9--pyh9f0ad1d_0", + "MOCK_DSL2_APPTAINER_VAR1": "https://depot.galaxyproject.org/singularity/dsltwoapptainervarone:1.1.0--py38h7be5676_2", + "MOCK_DSL2_APPTAINER_VAR2": "https://depot.galaxyproject.org/singularity/dsltwoapptainervartwo:1.1.0--hdfd78af_0", + "MOCK_DSL2_CURRENT": "https://depot.galaxyproject.org/singularity/dsltwocurrent:1.2.1--pyhdfd78af_0", + "MOCK_DSL2_CURRENT_INVERTED": "https://depot.galaxyproject.org/singularity/dsltwocurrentinv:3.3.2--h1b792b2_1", + "MOCK_DSL2_OLD": "https://depot.galaxyproject.org/singularity/dsltwoold:0.23.0--0", + "MOCK_SEQERA_CONTAINER_HTTP": "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data", + "MOCK_SEQERA_CONTAINER_ORAS": "oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6", + "MOCK_SEQERA_CONTAINER_ORAS_MULLED": "oras://community.wave.seqera.io/library/umi-transfer_umicollapse:796a995ff53da9e3" +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/local/utils_nfcore_mock-pipeline_pipeline/main.nf b/tests/data/mock_pipeline_containers/subworkflows/local/utils_nfcore_mock-pipeline_pipeline/main.nf new file mode 100644 index 0000000000..c2f8a26bea --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/local/utils_nfcore_mock-pipeline_pipeline/main.nf @@ -0,0 +1,200 @@ +// +// Subworkflow with functionality specific to the nf-core/mock-pipeline pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW TO INITIALISE PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + + // + // Create channel from input file provided through params.input + // + + Channel + .fromPath(params.input) + .splitCsv(header: true, strip: true) + .map { row -> + [[id:row.sample], row.fastq_1, row.fastq_2] + } + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { samplesheet -> + validateInputSamplesheet(samplesheet) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW FOR PIPELINE COMPLETION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_COMPLETION { + + take: + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + multiqc_report // string: Path to MultiQC report + + main: + summary_params = [:] + def multiqc_reports = multiqc_report.toList() + + // + // Completion email and summary + // + workflow.onComplete { + + completionSummary(monochrome_logs) + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familiar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000000..d6e593e852 --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } + catch (NullPointerException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } + + if (channels_missing | channel_priority_violation) { + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) + } +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000000..e5c3a0a828 --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000000..68718e4f59 --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000000..e3f0baf473 --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000000..02dbf094cd --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,113 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + expect { + with(workflow) { + assert success + assert "nextflow_workflow v9.9.9" in stdout + } + } + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 0000000000..a09572e5bb --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 0000000000..f84761125a --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000000..bfd258760d --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,419 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFCORE_PIPELINE { + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } + if (nextflow_cli_args[0]) { + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// Return a single report from an object that may be a Path or List +// +def getSingleReport(multiqc_reports) { + if (multiqc_reports instanceof Path) { + return multiqc_reports + } else if (multiqc_reports instanceof List) { + if (multiqc_reports.size() == 0) { + log.warn("[${workflow.manifest.name}] No reports found from process 'MULTIQC'") + return null + } else if (multiqc_reports.size() == 1) { + return multiqc_reports.first() + } else { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + return multiqc_reports.first() + } + } else { + return null + } +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" + if (!workflow.success) { + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" + } + + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = getSingleReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as MemoryUnit + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { + new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') + } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception msg) { + log.debug(msg.toString()) + log.debug("Trying with mail instead of sendmail") + // Catch failures and try with plaintext + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] + mail_cmd.execute() << email_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") + } + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection() + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) + } +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000000..d08d24342d --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000000..f117040cbd --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,126 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function getSingleReport with a single file") { + function "getSingleReport" + + when { + function { + """ + input[0] = file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert function.result.contains("test.tsv") } + ) + } + } + + test("Test Function getSingleReport with multiple files") { + function "getSingleReport" + + when { + function { + """ + input[0] = [ + file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/network.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/expression.tsv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert function.result.contains("test.tsv") }, + { assert !function.result.contains("network.tsv") }, + { assert !function.result.contains("expression.tsv") } + ) + } + } +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000000..02c6701413 --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,136 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000000..8940d32d1e --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 0000000000..859d1030fb --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 0000000000..d0a926bf6d --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 0000000000..ac8523c9a2 --- /dev/null +++ b/tests/data/mock_pipeline_containers/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/tests/data/mock_pipeline_containers/workflows/passing.nf b/tests/data/mock_pipeline_containers/workflows/passing.nf new file mode 100644 index 0000000000..2fa13bfef3 --- /dev/null +++ b/tests/data/mock_pipeline_containers/workflows/passing.nf @@ -0,0 +1,42 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// Mock modules below +include { MOCK_DOCKER_SINGLE_QUAY_IO } from '../modules/local/passing/mock_docker_single_quay_io/main' +include { MOCK_DSL2_APPTAINER_VAR1 } from '../modules/local/passing/mock_dsl2_apptainer_var1/main' +include { MOCK_DSL2_APPTAINER_VAR2 } from '../modules/local/passing/mock_dsl2_apptainer_var2/main' +include { MOCK_DSL2_CURRENT } from '../modules/local/passing/mock_dsl2_current/main' +include { MOCK_DSL2_CURRENT_INVERTED } from '../modules/local/passing/mock_dsl2_current_inverted/main' +include { MOCK_DSL2_OLD } from '../modules/local/passing/mock_dsl2_old/main' +include { MOCK_SEQERA_CONTAINER_HTTP } from '../modules/local/passing/mock_seqera_container_http/main' +include { MOCK_SEQERA_CONTAINER_ORAS } from '../modules/local/passing/mock_seqera_container_oras/main' +include { MOCK_SEQERA_CONTAINER_ORAS_MULLED } from '../modules/local/passing/mock_seqera_container_oras_mulled/main' +// include { RMARKDOWNNOTEBOOK } from '../modules/nf-core/rmarkdownnotebook/main' + +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mock-pipeline_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PASSING { + take: + ch_mockery // channel: samplesheet read in from --input + + main: + ch_mockery = MOCK_DOCKER_SINGLE_QUAY_IO(ch_mockery) + ch_mockery = MOCK_DSL2_APPTAINER_VAR1(ch_mockery) + ch_mockery = MOCK_DSL2_APPTAINER_VAR2(ch_mockery) + ch_mockery = MOCK_DSL2_CURRENT(ch_mockery) + ch_mockery = MOCK_DSL2_CURRENT_INVERTED(ch_mockery) + ch_mockery = MOCK_DSL2_OLD(ch_mockery) + + emit: + ch_mockery +} diff --git a/tests/pipelines/test_download.py b/tests/pipelines/test_download.py index 84675c7ca0..2ee8a8f7d4 100644 --- a/tests/pipelines/test_download.py +++ b/tests/pipelines/test_download.py @@ -1,26 +1,503 @@ """Tests for the download subcommand of nf-core tools""" +import json import logging import os import re import shutil +import subprocess import tempfile import unittest +from contextlib import redirect_stderr +from io import StringIO from pathlib import Path from unittest import mock import pytest +import requests +import rich.progress_bar +import rich.table +import rich.text import nf_core.pipelines.create.create +import nf_core.pipelines.download import nf_core.pipelines.list import nf_core.utils -from nf_core.pipelines.download import ContainerError, DownloadWorkflow, WorkflowRepo +from nf_core.pipelines.download import DownloadWorkflow +from nf_core.pipelines.download.container_fetcher import ContainerProgress +from nf_core.pipelines.download.docker import ( + DockerError, + DockerFetcher, + DockerProgress, +) +from nf_core.pipelines.download.singularity import ( + FileDownloader, + SingularityError, + SingularityFetcher, + SingularityProgress, +) +from nf_core.pipelines.download.utils import ( + DownloadError, + intermediate_file, +) +from nf_core.pipelines.download.workflow_repo import WorkflowRepo from nf_core.synced_repo import SyncedRepo -from nf_core.utils import run_cmd +from nf_core.utils import ( + NF_INSPECT_MIN_NF_VERSION, + check_nextflow_version, +) from ..utils import TEST_DATA_DIR, with_temporary_folder +class DownloadUtilsTest(unittest.TestCase): + @pytest.fixture(autouse=True) + def use_caplog(self, caplog): + self._caplog = caplog + + # + # Test for 'utils.intermediate_file' + # + @with_temporary_folder + def test_intermediate_file(self, outdir): + outdir = Path(outdir) + # Code that doesn't fail. The file shall exist + + # Directly write to the file, as in download_image + output_path = outdir / "testfile1" + with intermediate_file(output_path) as tmp: + tmp_path = Path(tmp.name) + tmp.write(b"Hello, World!") + + assert output_path.exists() + assert os.path.getsize(output_path) == 13 + assert not tmp_path.exists() + + # Run an external command as in pull_image + output_path = outdir / "testfile2" + with intermediate_file(output_path) as tmp: + tmp_path = Path(tmp.name) + subprocess.check_call([f"echo 'Hello, World!' > {tmp_path}"], shell=True) + + assert (output_path).exists() + assert os.path.getsize(output_path) == 14 # Extra \n ! + assert not (tmp_path).exists() + + # Code that fails. The file shall not exist + + # Directly write to the file and raise an exception + output_path = outdir / "testfile3" + with pytest.raises(ValueError): + with intermediate_file(output_path) as tmp: + tmp_path = Path(tmp.name) + tmp.write(b"Hello, World!") + raise ValueError("This is a test error") + + assert not (output_path).exists() + assert not (tmp_path).exists() + + # Run an external command and raise an exception + output_path = outdir / "testfile4" + with pytest.raises(subprocess.CalledProcessError): + with intermediate_file(output_path) as tmp: + tmp_path = Path(tmp.name) + subprocess.check_call([f"echo 'Hello, World!' > {tmp_path}"], shell=True) + subprocess.check_call(["ls", "/dummy"]) + + assert not (output_path).exists() + assert not (tmp_path).exists() + + # Test for invalid output paths + with pytest.raises(DownloadError): + with intermediate_file(outdir) as tmp: + pass + + output_path = outdir / "testfile5" + os.symlink("/dummy", output_path) + with pytest.raises(DownloadError): + with intermediate_file(output_path) as tmp: + pass + + # + # Test for 'utils.DownloadProgress.add/update_main_task' + # + def test_download_progress_main_task(self): + with ContainerProgress() as progress: + # No task initially + assert progress.tasks == [] + + # Add a task, it should be there + task_id = progress.add_main_task(total=42) + assert task_id == 0 + assert len(progress.tasks) == 1 + assert progress.task_ids[0] == task_id + assert progress.tasks[0].total == 42 + + # Add another task, there should now be two + other_task_id = progress.add_task("Another task", total=28) + assert other_task_id == 1 + assert len(progress.tasks) == 2 + assert progress.task_ids[1] == other_task_id + assert progress.tasks[1].total == 28 + + progress.update_main_task(total=35) + assert progress.tasks[0].total == 35 + assert progress.tasks[1].total == 28 + + # + # Test for 'utils.DownloadProgress.sub_task' + # + def test_download_progress_sub_task(self): + with ContainerProgress() as progress: + # No task initially + assert progress.tasks == [] + + # Add a sub-task, it should be there + with progress.sub_task("Sub-task", total=42) as sub_task_id: + assert sub_task_id == 0 + assert len(progress.tasks) == 1 + assert progress.task_ids[0] == sub_task_id + assert progress.tasks[0].total == 42 + + # The sub-task should be gone now + assert progress.tasks == [] + + # Add another sub-task, this time that raises an exception + with pytest.raises(ValueError): + with progress.sub_task("Sub-task", total=28) as sub_task_id: + assert sub_task_id == 1 + assert len(progress.tasks) == 1 + assert progress.task_ids[0] == sub_task_id + assert progress.tasks[0].total == 28 + raise ValueError("This is a test error") + + # The sub-task should also be gone now + assert progress.tasks == [] + + # + # Test for 'utils.DownloadProgress.get_renderables' + # + def test_download_progress_renderables(self): + # Test the "summary" progress type + with ContainerProgress() as progress: + assert progress.tasks == [] + progress.add_task("Task 1", progress_type="summary", total=42, completed=11) + assert len(progress.tasks) == 1 + + renderable = progress.get_renderable() + assert isinstance(renderable, rich.console.Group), type(renderable) + + assert len(renderable.renderables) == 1 + table = renderable.renderables[0] + assert isinstance(table, rich.table.Table) + + assert isinstance(table.columns[0]._cells[0], str) + assert table.columns[0]._cells[0] == "[magenta]Task 1" + + assert isinstance(table.columns[1]._cells[0], rich.progress_bar.ProgressBar) + assert table.columns[1]._cells[0].completed == 11 + assert table.columns[1]._cells[0].total == 42 + + assert isinstance(table.columns[2]._cells[0], str) + assert table.columns[2]._cells[0] == "[progress.percentage] 26%" + + assert isinstance(table.columns[3]._cells[0], str) + assert table.columns[3]._cells[0] == "•" + + assert isinstance(table.columns[4]._cells[0], str) + assert table.columns[4]._cells[0] == "[green]11/42 tasks completed" + + # + # Test the SingularityProgress subclass + # + + # Test the "singularity_pull" progress type + with SingularityProgress() as progress: + assert progress.tasks == [] + progress.add_task( + "Task 1", progress_type="singularity_pull", total=42, completed=11, current_log="example log" + ) + assert len(progress.tasks) == 1 + + renderable = progress.get_renderable() + assert isinstance(renderable, rich.console.Group), type(renderable) + + assert len(renderable.renderables) == 1 + table = renderable.renderables[0] + assert isinstance(table, rich.table.Table) + + assert isinstance(table.columns[0]._cells[0], str) + assert table.columns[0]._cells[0] == "[magenta]Task 1" + + assert isinstance(table.columns[1]._cells[0], str) + assert table.columns[1]._cells[0] == "[blue]example log" + + assert isinstance(table.columns[2]._cells[0], rich.progress_bar.ProgressBar) + assert table.columns[2]._cells[0].completed == 11 + assert table.columns[2]._cells[0].total == 42 + + # Test the "download" progress type + with SingularityProgress() as progress: + assert progress.tasks == [] + progress.add_task("Task 1", progress_type="download", total=42, completed=11) + assert len(progress.tasks) == 1 + + renderable = progress.get_renderable() + assert isinstance(renderable, rich.console.Group), type(renderable) + + assert len(renderable.renderables) == 1 + table = renderable.renderables[0] + assert isinstance(table, rich.table.Table) + + assert isinstance(table.columns[0]._cells[0], str) + assert table.columns[0]._cells[0] == "[blue]Task 1" + + assert isinstance(table.columns[1]._cells[0], rich.progress_bar.ProgressBar) + assert table.columns[1]._cells[0].completed == 11 + assert table.columns[1]._cells[0].total == 42 + + assert isinstance(table.columns[2]._cells[0], str) + assert table.columns[2]._cells[0] == "[progress.percentage]26.2%" + + assert isinstance(table.columns[3]._cells[0], str) + assert table.columns[3]._cells[0] == "•" + + assert isinstance(table.columns[4]._cells[0], rich.text.Text) + assert table.columns[4]._cells[0]._text == ["11/42 bytes"] + + assert isinstance(table.columns[5]._cells[0], str) + assert table.columns[5]._cells[0] == "•" + + assert isinstance(table.columns[6]._cells[0], rich.text.Text) + assert table.columns[6]._cells[0]._text == ["?"] + + # + # Test the DockerProgress subclass + # + with DockerProgress() as progress: + assert progress.tasks == [] + progress.add_task( + "Task 1", progress_type="docker", total=2, completed=1, current_log="example log", status="Pulling" + ) + assert len(progress.tasks) == 1 + + renderable = progress.get_renderable() + assert isinstance(renderable, rich.console.Group), type(renderable) + + assert len(renderable.renderables) == 1 + table = renderable.renderables[0] + assert isinstance(table, rich.table.Table) + + assert isinstance(table.columns[0]._cells[0], str) + assert table.columns[0]._cells[0] == "[magenta]Task 1" + assert isinstance(table.columns[2]._cells[0], str) + assert table.columns[2]._cells[0] == "([blue]Pulling)" + + # + # Test for 'singularity.FileDownloader.download_file' + # + @with_temporary_folder + def test_file_download(self, outdir): + outdir = Path(outdir) + with ContainerProgress() as progress: + downloader = FileDownloader(progress) + + # Activate the caplog: all download attempts must be logged (even failed ones) + self._caplog.clear() + with self._caplog.at_level(logging.DEBUG): + # No task initially + assert progress.tasks == [] + assert progress._task_index == 0 + + # Download a file + src_url = "https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/sarscov2/genome/genome.fasta.fai" + output_path = outdir / Path(src_url).name + downloader.download_file(src_url, output_path) + assert (output_path).exists() + assert os.path.getsize(output_path) == 27 + assert ( + "nf_core.pipelines.download.singularity", + logging.DEBUG, + f"Downloading '{src_url}' to '{output_path}'", + ) in self._caplog.record_tuples + + # A task was added but is now gone + assert progress._task_index == 1 + assert progress.tasks == [] + + # No content at the URL + src_url = "http://www.google.com/generate_204" + output_path = outdir / Path(src_url).name + with pytest.raises(DownloadError): + downloader.download_file(src_url, output_path) + assert not (output_path).exists() + assert ( + "nf_core.pipelines.download.singularity", + logging.DEBUG, + f"Downloading '{src_url}' to '{output_path}'", + ) in self._caplog.record_tuples + + # A task was added but is now gone + assert progress._task_index == 2 + assert progress.tasks == [] + + # Invalid URL (schema) + src_url = "dummy://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/sarscov2/genome/genome.fasta.fax" + output_path = outdir / Path(src_url).name + with pytest.raises(requests.exceptions.InvalidSchema): + downloader.download_file(src_url, output_path) + assert not (output_path).exists() + assert ( + "nf_core.pipelines.download.singularity", + logging.DEBUG, + f"Downloading '{src_url}' to '{output_path}'", + ) in self._caplog.record_tuples + + # A task was added but is now gone + assert progress._task_index == 3 + assert progress.tasks == [] + + # Fire in the hole ! The download will be aborted and no output file will be created + src_url = "https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/sarscov2/genome/genome.fasta.fai" + output_path = outdir / Path(src_url).name + os.unlink(output_path) + downloader.kill_with_fire = True + with pytest.raises(KeyboardInterrupt): + downloader.download_file(src_url, output_path) + assert not (output_path).exists() + + # + # Test for 'singularity.FileDownloader.download_files_in_parallel' + # + @with_temporary_folder + def test_parallel_downloads(self, outdir): + outdir = Path(outdir) + + # Prepare the download paths + def make_tuple(url): + return (url, (outdir / Path(url).name)) + + download_fai = make_tuple( + "https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/sarscov2/genome/genome.fasta.fai" + ) + download_dict = make_tuple( + "https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/sarscov2/genome/genome.dict" + ) + download_204 = make_tuple("http://www.google.com/generate_204") + download_schema = make_tuple( + "dummy://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/sarscov2/genome/genome.fasta.fax" + ) + + with ContainerProgress() as progress: + downloader = FileDownloader(progress) + + # Download two files + assert downloader.kill_with_fire is False + downloads = [download_fai, download_dict] + downloaded_files = downloader.download_files_in_parallel(downloads, parallel_downloads=1) + assert len(downloaded_files) == 2 + assert downloaded_files == downloads + assert (download_fai[1]).exists() + assert (download_dict[1]).exists() + assert downloader.kill_with_fire is False + (download_fai[1]).unlink() + (download_dict[1]).unlink() + + # This time, the second file will raise an exception + assert downloader.kill_with_fire is False + downloads = [download_fai, download_204] + with pytest.raises(DownloadError): + downloader.download_files_in_parallel(downloads, parallel_downloads=1) + assert downloader.kill_with_fire is False + assert (download_fai[1]).exists() + assert not (download_204[1]).exists() + (download_fai[1]).unlink() + + # Now we swap the two files. The first one will raise an exception but the + # second one will still be downloaded because only KeyboardInterrupt can + # stop everything altogether. + assert downloader.kill_with_fire is False + downloads = [download_204, download_fai] + with pytest.raises(DownloadError): + downloader.download_files_in_parallel(downloads, parallel_downloads=1) + assert downloader.kill_with_fire is False + assert (download_fai[1]).exists() + assert not (download_204[1]).exists() + (download_fai[1]).unlink() + + # We check that there's the same behaviour with `requests` errors. + assert downloader.kill_with_fire is False + downloads = [download_schema, download_fai] + with pytest.raises(DownloadError): + downloader.download_files_in_parallel(downloads, parallel_downloads=1) + assert downloader.kill_with_fire is False + assert (download_fai[1]).exists() + assert not (download_schema[1]).exists() + (download_fai[1]).unlink() + + # Now we check the callback method + callbacks = [] + + def callback(*args): + callbacks.append(args) + + # We check the same scenarios as above + callbacks = [] + downloads = [download_fai, download_dict] + downloader.download_files_in_parallel(downloads, parallel_downloads=1, callback=callback) + assert len(callbacks) == 2 + assert callbacks == [ + (download_fai, FileDownloader.Status.DONE), + (download_dict, FileDownloader.Status.DONE), + ] + + callbacks = [] + downloads = [download_fai, download_204] + with pytest.raises(DownloadError): + downloader.download_files_in_parallel(downloads, parallel_downloads=1, callback=callback) + assert len(callbacks) == 2 + assert callbacks == [ + (download_fai, FileDownloader.Status.DONE), + (download_204, FileDownloader.Status.ERROR), + ] + + callbacks = [] + downloads = [download_204, download_fai] + with pytest.raises(DownloadError): + downloader.download_files_in_parallel(downloads, parallel_downloads=1, callback=callback) + assert len(callbacks) == 2 + assert callbacks == [ + (download_204, FileDownloader.Status.ERROR), + (download_fai, FileDownloader.Status.DONE), + ] + + callbacks = [] + downloads = [download_schema, download_fai] + with pytest.raises(DownloadError): + downloader.download_files_in_parallel(downloads, parallel_downloads=1, callback=callback) + assert len(callbacks) == 2 + assert callbacks == [ + (download_schema, FileDownloader.Status.ERROR), + (download_fai, FileDownloader.Status.DONE), + ] + + # Finally, we check how the function behaves when a KeyboardInterrupt is raised + with mock.patch("concurrent.futures.wait", side_effect=KeyboardInterrupt): + callbacks = [] + downloads = [download_fai, download_204, download_dict] + with pytest.raises(KeyboardInterrupt): + downloader.download_files_in_parallel(downloads, parallel_downloads=1, callback=callback) + assert len(callbacks) == 3 + # Note: whn the KeyboardInterrupt is raised, download_204 and download_dict are not yet started. + # They are therefore cancelled and pushed to the callback list immediately. download_fai is last + # because it is running and can't be cancelled. + assert callbacks == [ + (download_204, FileDownloader.Status.CANCELLED), + (download_dict, FileDownloader.Status.CANCELLED), + (download_fai, FileDownloader.Status.ERROR), + ] + + class DownloadTest(unittest.TestCase): @pytest.fixture(autouse=True) def use_caplog(self, caplog): @@ -55,7 +532,7 @@ def test_get_release_hash_release(self): ) = nf_core.utils.get_repo_releases_branches(pipeline, wfs) download_obj.get_revision_hash() assert download_obj.wf_sha[download_obj.revision[0]] == "b3e5e3b95aaf01d98391a62a10a3990c0a4de395" - assert download_obj.outdir == "nf-core-methylseq_1.6" + assert download_obj.outdir == Path("nf-core-methylseq_1.6") assert ( download_obj.wf_download_url[download_obj.revision[0]] == "https://github.com/nf-core/methylseq/archive/b3e5e3b95aaf01d98391a62a10a3990c0a4de395.zip" @@ -74,7 +551,7 @@ def test_get_release_hash_branch(self): ) = nf_core.utils.get_repo_releases_branches(pipeline, wfs) download_obj.get_revision_hash() assert download_obj.wf_sha[download_obj.revision[0]] == "819cbac792b76cf66c840b567ed0ee9a2f620db7" - assert download_obj.outdir == "nf-core-exoseq_dev" + assert download_obj.outdir == Path("nf-core-exoseq_dev") assert ( download_obj.wf_download_url[download_obj.revision[0]] == "https://github.com/nf-core/exoseq/archive/819cbac792b76cf66c840b567ed0ee9a2f620db7.zip" @@ -95,7 +572,7 @@ def test_get_release_hash_long_commit(self): ) = nf_core.utils.get_repo_releases_branches(pipeline, wfs) download_obj.get_revision_hash() assert download_obj.wf_sha[download_obj.revision[0]] == revision - assert download_obj.outdir == f"nf-core-exoseq_{revision}" + assert download_obj.outdir == Path(f"nf-core-exoseq_{revision}") assert ( download_obj.wf_download_url[download_obj.revision[0]] == f"https://github.com/nf-core/exoseq/archive/{revision}.zip" @@ -118,7 +595,7 @@ def test_get_release_hash_short_commit(self): download_obj.get_revision_hash() print(download_obj) assert download_obj.wf_sha[download_obj.revision[0]] == revision - assert download_obj.outdir == f"nf-core-exoseq_{short_rev}" + assert download_obj.outdir == Path(f"nf-core-exoseq_{short_rev}") assert ( download_obj.wf_download_url[download_obj.revision[0]] == f"https://github.com/nf-core/exoseq/archive/{revision}.zip" @@ -142,6 +619,7 @@ def test_get_release_hash_non_existent_release(self): # @with_temporary_folder def test_download_wf_files(self, outdir): + outdir = Path(outdir) download_obj = DownloadWorkflow(pipeline="nf-core/methylseq", revision="1.6") download_obj.outdir = outdir download_obj.wf_sha = {"1.6": "b3e5e3b95aaf01d98391a62a10a3990c0a4de395"} @@ -153,25 +631,28 @@ def test_download_wf_files(self, outdir): download_obj.wf_sha[download_obj.revision[0]], download_obj.wf_download_url[download_obj.revision[0]], ) - assert os.path.exists(os.path.join(outdir, rev, "main.nf")) + + assert ((outdir / rev) / "main.nf").exists() # # Tests for 'download_configs' # @with_temporary_folder def test_download_configs(self, outdir): + outdir = Path(outdir) download_obj = DownloadWorkflow(pipeline="nf-core/methylseq", revision="1.6") download_obj.outdir = outdir download_obj.download_configs() - assert os.path.exists(os.path.join(outdir, "configs", "nfcore_custom.config")) + assert (outdir / "configs") / "nfcore_custom.config" # # Tests for 'wf_use_local_configs' # @with_temporary_folder def test_wf_use_local_configs(self, tmp_path): + tmp_path = Path(tmp_path) # Get a workflow and configs - test_pipeline_dir = os.path.join(tmp_path, "nf-core-testpipeline") + test_pipeline_dir = tmp_path / "nf-core-testpipeline" create_obj = nf_core.pipelines.create.create.PipelineCreate( "testpipeline", "This is a test pipeline", @@ -192,256 +673,84 @@ def test_wf_use_local_configs(self, tmp_path): assert wf_config["params.custom_config_base"] == f"{test_outdir}/workflow/../configs/" # - # Tests for 'find_container_images' - # - @with_temporary_folder - @mock.patch("nf_core.utils.fetch_wf_config") - def test_find_container_images_config_basic(self, tmp_path, mock_fetch_wf_config): - download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path) - mock_fetch_wf_config.return_value = { - "process.mapping.container": "cutting-edge-container", - "process.nocontainer": "not-so-cutting-edge", - } - download_obj.find_container_images("workflow") - assert len(download_obj.containers) == 1 - assert download_obj.containers[0] == "cutting-edge-container" - - # - # Test for 'find_container_images' in config with nextflow + # Test that `find_container_images` (uses `nextflow inspect`) and `find_container_images_legacy` + # produces the same results # @pytest.mark.skipif( - shutil.which("nextflow") is None, + shutil.which("nextflow") is None or not check_nextflow_version(NF_INSPECT_MIN_NF_VERSION), reason="Can't run test that requires nextflow to run if not installed.", ) @with_temporary_folder @mock.patch("nf_core.utils.fetch_wf_config") - def test__find_container_images_config_nextflow(self, tmp_path, mock_fetch_wf_config): - download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path) - result = run_cmd("nextflow", f"config -flat {TEST_DATA_DIR}'/mock_config_containers'") - if result is not None: - nfconfig_raw, _ = result - config = {} - nfconfig = nfconfig_raw.decode("utf-8") - multiline_key_value_pattern = re.compile(r"(^|\n)([^\n=]+?)\s*=\s*((?:(?!\n[^\n=]+?\s*=).)*)", re.DOTALL) - - for match in multiline_key_value_pattern.finditer(nfconfig): - k = match.group(2).strip() - v = match.group(3).strip().strip("'\"") - if k and v: - config[k] = v - mock_fetch_wf_config.return_value = config - download_obj.find_container_images("workflow") - assert "nfcore/methylseq:1.0" in download_obj.containers - assert "nfcore/methylseq:1.4" in download_obj.containers - assert "nfcore/sarek:dev" in download_obj.containers - assert ( - "https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1" in download_obj.containers - ) - assert ( - "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/06/06beccfa4d48e5daf30dd8cee4f7e06fd51594963db0d5087ab695365b79903b/data" - in download_obj.containers - ) - assert ( - "community.wave.seqera.io/library/last_samtools_open-fonts:176a6ab0c8171057" in download_obj.containers - ) - assert "singularity" not in download_obj.containers - # does not yet pick up nfcore/sarekvep:dev.${params.genome}, because that is no valid URL or Docker URI. - - # - # Test for 'find_container_images' in modules - # - @with_temporary_folder - @mock.patch("nf_core.utils.fetch_wf_config") - def test_find_container_images_modules(self, tmp_path, mock_fetch_wf_config): - download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path) + def test_containers_pipeline_singularity(self, tmp_path, mock_fetch_wf_config): + tmp_path = Path(tmp_path) + assert check_nextflow_version(NF_INSPECT_MIN_NF_VERSION) is True + + # Set up test + container_system = "singularity" + mock_pipeline_dir = TEST_DATA_DIR / "mock_pipeline_containers" + refererence_json_dir = mock_pipeline_dir / "per_profile_output" + # First check that `-profile singularity` produces the same output as the reference + download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path, container_system=container_system) mock_fetch_wf_config.return_value = {} - download_obj.find_container_images(str(Path(TEST_DATA_DIR, "mock_module_containers"))) - # mock_docker_single_quay_io.nf - assert "quay.io/biocontainers/singlequay:1.9--pyh9f0ad1d_0" in download_obj.containers + # Run get containers with `nextflow inspect` + entrypoint = "main_passing_test.nf" + download_obj.find_container_images(mock_pipeline_dir, "dummy-revision", entrypoint=entrypoint) - # mock_dsl2_apptainer_var1.nf (possible future convention?) - assert ( - "https://depot.galaxyproject.org/singularity/dsltwoapptainervarone:1.1.0--py38h7be5676_2" - in download_obj.containers - ) - assert "biocontainers/dsltwoapptainervarone:1.1.0--py38h7be5676_2" not in download_obj.containers + # Store the containers found by the new method + found_containers = set(download_obj.containers) - # mock_dsl2_apptainer_var2.nf (possible future convention?) - assert ( - "https://depot.galaxyproject.org/singularity/dsltwoapptainervartwo:1.1.0--hdfd78af_0" - in download_obj.containers - ) - assert "biocontainers/dsltwoapptainervartwo:1.1.0--hdfd78af_0" not in download_obj.containers - - # mock_dsl2_current_inverted.nf (new implementation supports if the direct download URL is listed after Docker URI) - assert ( - "https://depot.galaxyproject.org/singularity/dsltwocurrentinv:3.3.2--h1b792b2_1" in download_obj.containers - ) - assert "biocontainers/dsltwocurrentinv:3.3.2--h1b792b2_1" not in download_obj.containers - - # mock_dsl2_current.nf (main nf-core convention, should be the one in far the most modules) - assert ( - "https://depot.galaxyproject.org/singularity/dsltwocurrent:1.2.1--pyhdfd78af_0" in download_obj.containers - ) - assert "biocontainers/dsltwocurrent:1.2.1--pyhdfd78af_0" not in download_obj.containers - - # mock_dsl2_old.nf (initial DSL2 convention) - assert "https://depot.galaxyproject.org/singularity/dsltwoold:0.23.0--0" in download_obj.containers - assert "quay.io/biocontainers/dsltwoold:0.23.0--0" not in download_obj.containers - - # mock_dsl2_variable.nf (currently the edgiest edge case supported) - assert ( - "https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0" - in download_obj.containers - ) - assert ( - "https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0" - in download_obj.containers - ) - assert ( - "quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0" - not in download_obj.containers - ) - assert ( - "quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0" - not in download_obj.containers - ) - - # mock_seqera_container_oras.nf - assert "oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6" in download_obj.containers - assert "community.wave.seqera.io/library/umi-transfer:1.0.0--d30e8812ea280fa1" not in download_obj.containers - - # mock_seqera_container_oras_mulled.nf - assert ( - "oras://community.wave.seqera.io/library/umi-transfer_umicollapse:796a995ff53da9e3" - in download_obj.containers - ) - assert ( - "community.wave.seqera.io/library/umi-transfer_umicollapse:3298d4f1b49e33bd" not in download_obj.containers - ) + # Load the reference containers + with open(refererence_json_dir / f"{container_system}_containers.json") as fh: + ref_containers = json.load(fh) + ref_container_strs = set(ref_containers.values()) - # mock_seqera_container_http.nf - assert ( - "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data" - in download_obj.containers + # Now check that they contain the same containers + assert found_containers == ref_container_strs, ( + f"Containers found in pipeline by `nextflow inspect`: {found_containers}\n" + f"Containers that should've been found: {ref_container_strs}" ) - # ToDO: This URI should actually NOT be in there, but prioritize_direct_download() can not handle this case. - # - # It works purely by comparing the strings, thus can establish the equivalence of 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.5--py39hf95cd2a_0' - # and 'biocontainers/umi_tools:1.1.5--py39hf95cd2a_0' because of the identical string 'umi_tools:1.1.5--py39hf95cd2a_0', but has no means to establish that - # 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data' and - # 'community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264' are the equivalent container. It would need to query an API at Seqera for that. - - assert "community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264" in download_obj.containers - # - # Test for 'prioritize_direct_download' + # Test that `find_container_images` (uses `nextflow inspect`) and `find_container_images_legacy` + # produces the same results # + @pytest.mark.skipif( + shutil.which("nextflow") is None or not check_nextflow_version(NF_INSPECT_MIN_NF_VERSION), + reason=f"Can't run test that requires Nextflow >= {NF_INSPECT_MIN_NF_VERSION} to run if not installed.", + ) @with_temporary_folder - def test_prioritize_direct_download(self, tmp_path): - download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path) - - # tests deduplication and https priority as well as Seqera Container exception - - test_container = [ - "https://depot.galaxyproject.org/singularity/ubuntu:22.04", - "nf-core/ubuntu:22.04", - "biocontainers/umi-transfer:1.5.0--h715e4b3_0", - "https://depot.galaxyproject.org/singularity/umi-transfer:1.5.0--h715e4b3_0", - "biocontainers/umi-transfer:1.5.0--h715e4b3_0", - "quay.io/nf-core/sortmerna:4.3.7--6502243397c065ba", - "nf-core/sortmerna:4.3.7--6502243397c065ba", - "https://depot.galaxyproject.org/singularity/sortmerna:4.3.7--hdbdd923_1", - "https://depot.galaxyproject.org/singularity/sortmerna:4.3.7--hdbdd923_0", - "https://depot.galaxyproject.org/singularity/sortmerna:4.2.0--h9ee0642_1", - "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data", - "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data", - "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data", - ] - - result = download_obj.prioritize_direct_download(test_container) - - # Verify that the priority works for regular https downloads (https encountered first) - assert "https://depot.galaxyproject.org/singularity/ubuntu:22.04" in result - assert "nf-core/ubuntu:22.04" not in result + @mock.patch("nf_core.utils.fetch_wf_config") + def test_containers_pipeline_docker(self, tmp_path, mock_fetch_wf_config): + tmp_path = Path(tmp_path) + assert check_nextflow_version(NF_INSPECT_MIN_NF_VERSION) is True + + # Set up test + container_system = "docker" + mock_pipeline_dir = TEST_DATA_DIR / "mock_pipeline_containers" + refererence_json_dir = mock_pipeline_dir / "per_profile_output" + # First check that `-profile singularity` produces the same output as the reference + download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path, container_system=container_system) + mock_fetch_wf_config.return_value = {} - # Verify that the priority works for regular https downloads (https encountered second) - assert "biocontainers/umi-transfer:1.5.0--h715e4b3_0" not in result - assert "https://depot.galaxyproject.org/singularity/umi-transfer:1.5.0--h715e4b3_0" in result + # Run get containers with `nextflow inspect` + entrypoint = "main_passing_test.nf" + download_obj.find_container_images(mock_pipeline_dir, "dummy-revision", entrypoint=entrypoint) - # Verify that the priority works for images with and without explicit registry - # No priority here, though - the first is retained. - assert "nf-core/sortmerna:4.3.7--6502243397c065ba" in result - assert "quay.io/nf-core/sortmerna:4.3.7--6502243397c065ba" not in result + # Store the containers found by the new method + found_containers = set(download_obj.containers) - # Verify that different versions of the same tool and different build numbers are retained - assert "https://depot.galaxyproject.org/singularity/sortmerna:4.3.7--hdbdd923_1" in result - assert "https://depot.galaxyproject.org/singularity/sortmerna:4.3.7--hdbdd923_0" in result - assert "https://depot.galaxyproject.org/singularity/sortmerna:4.2.0--h9ee0642_1" in result + # Load the reference containers + with open(refererence_json_dir / f"{container_system}_containers.json") as fh: + ref_containers = json.load(fh) + ref_container_strs = set(ref_containers.values()) - # Verify that Seqera containers are not deduplicated... - assert ( - "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data" - in result - ) - assert ( - "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data" - in result + # Now check that they contain the same containers + assert found_containers == ref_container_strs, ( + f"Containers found in pipeline by `nextflow inspect`: {found_containers}\n" + f"Containers that should've been found: {ref_container_strs}" ) - # ...but identical ones are. - assert ( - result.count( - "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data" - ) - == 1 - ) - - # - # Test for 'reconcile_seqera_container_uris' - # - @with_temporary_folder - def test_reconcile_seqera_container_uris(self, tmp_path): - download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path) - - prioritized_container = [ - "oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6", - "oras://community.wave.seqera.io/library/sylph:0.6.1--b97274cdc1caa649", - ] - - test_container = [ - "https://depot.galaxyproject.org/singularity/ubuntu:22.04", - "nf-core/ubuntu:22.04", - "nf-core/ubuntu:22.04", - "nf-core/ubuntu:22.04", - "community.wave.seqera.io/library/umi-transfer:1.5.0--73c1a6b65e5b0b81", - "community.wave.seqera.io/library/sylph:0.6.1--a21713a57a65a373", - "biocontainers/sylph:0.6.1--b97274cdc1caa649", - ] - - # test that the test_container list is returned as it is, if no prioritized_containers are specified - result_empty = download_obj.reconcile_seqera_container_uris([], test_container) - assert result_empty == test_container - - result = download_obj.reconcile_seqera_container_uris(prioritized_container, test_container) - - # Verify that unrelated images are retained - assert "https://depot.galaxyproject.org/singularity/ubuntu:22.04" in result - assert "nf-core/ubuntu:22.04" in result - - # Verify that the priority works for regular Seqera container (Native Singularity over Docker, but only for Seqera registry) - assert "oras://community.wave.seqera.io/library/sylph:0.6.1--b97274cdc1caa649" in result - assert "community.wave.seqera.io/library/sylph:0.6.1--a21713a57a65a373" not in result - assert "biocontainers/sylph:0.6.1--b97274cdc1caa649" in result - - # Verify that version strings are respected: Version 1.0.0 does not replace version 1.5.0 - assert "oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6" in result - assert "community.wave.seqera.io/library/umi-transfer:1.5.0--73c1a6b65e5b0b81" in result - - # assert that the deduplication works - assert test_container.count("nf-core/ubuntu:22.04") == 3 - assert result.count("nf-core/ubuntu:22.04") == 1 # # Tests for 'singularity_pull_image' @@ -449,140 +758,299 @@ def test_reconcile_seqera_container_uris(self, tmp_path): # If Singularity is installed, but the container can't be accessed because it does not exist or there are access # restrictions, a RuntimeWarning is raised due to the unavailability of the image. @pytest.mark.skipif( - shutil.which("singularity") is None, + shutil.which("singularity") is None and shutil.which("apptainer") is None, reason="Can't test what Singularity does if it's not installed.", ) @with_temporary_folder - @mock.patch("rich.progress.Progress.add_task") - def test_singularity_pull_image_singularity_installed(self, tmp_dir, mock_rich_progress): - download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_dir) - - # Test successful pull - download_obj.singularity_pull_image( - "hello-world", f"{tmp_dir}/hello-world.sif", None, "docker.io", mock_rich_progress + @mock.patch("nf_core.pipelines.download.singularity.SingularityProgress") + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.prompt_singularity_cachedir_creation" + ) # This is to make sure that we do not prompt for a Singularity cachedir + def test_singularity_pull_image_singularity_installed(self, tmp_dir, mock_cachedir_prompt, mock_progress): + tmp_dir = Path(tmp_dir) + singularity_fetcher = SingularityFetcher( + outdir=tmp_dir, + container_library=[], + registry_set=[], + container_cache_utilisation="none", + container_cache_index=None, ) + singularity_fetcher.check_and_set_implementation() + singularity_fetcher.progress = mock_progress() + # Test successful pull + assert singularity_fetcher.pull_image("hello-world", tmp_dir / "hello-world.sif", "docker.io") is True # Pull again, but now the image already exists - with pytest.raises(ContainerError.ImageExistsError): - download_obj.singularity_pull_image( - "hello-world", f"{tmp_dir}/hello-world.sif", None, "docker.io", mock_rich_progress - ) + assert singularity_fetcher.pull_image("hello-world", tmp_dir / "hello-world.sif", "docker.io") is False # Test successful pull with absolute URI (use tiny 3.5MB test container from the "Kogia" project: https://github.com/bschiffthaler/kogia) - download_obj.singularity_pull_image( - "docker.io/bschiffthaler/sed", f"{tmp_dir}/sed.sif", None, "docker.io", mock_rich_progress - ) + assert singularity_fetcher.pull_image("docker.io/bschiffthaler/sed", tmp_dir / "sed.sif", "docker.io") is True # Test successful pull with absolute oras:// URI - download_obj.singularity_pull_image( - "oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6", - f"{tmp_dir}/umi-transfer-oras.sif", - None, - "docker.io", - mock_rich_progress, + assert ( + singularity_fetcher.pull_image( + "oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6", + tmp_dir / "umi-transfer-oras.sif", + "docker.io", + ) + is True ) # try pulling Docker container image with oras:// - with pytest.raises(ContainerError.NoSingularityContainerError): - download_obj.singularity_pull_image( + with pytest.raises(SingularityError.NoSingularityContainerError): + singularity_fetcher.pull_image( "oras://ghcr.io/matthiaszepper/umi-transfer:dev", - f"{tmp_dir}/umi-transfer-oras_impostor.sif", - None, + tmp_dir / "umi-transfer-oras_impostor.sif", "docker.io", - mock_rich_progress, ) # try to pull from non-existing registry (Name change hello-world_new.sif is needed, otherwise ImageExistsError is raised before attempting to pull.) - with pytest.raises(ContainerError.RegistryNotFoundError): - download_obj.singularity_pull_image( + with pytest.raises(SingularityError.RegistryNotFoundError): + singularity_fetcher.pull_image( "hello-world", - f"{tmp_dir}/break_the_registry_test.sif", - None, + tmp_dir / "break_the_registry_test.sif", "register-this-domain-to-break-the-test.io", - mock_rich_progress, ) # test Image not found for several registries - with pytest.raises(ContainerError.ImageNotFoundError): - download_obj.singularity_pull_image( - "a-container", f"{tmp_dir}/acontainer.sif", None, "quay.io", mock_rich_progress - ) + with pytest.raises(SingularityError.ImageNotFoundError): + singularity_fetcher.pull_image("a-container", tmp_dir / "acontainer.sif", "quay.io") - with pytest.raises(ContainerError.ImageNotFoundError): - download_obj.singularity_pull_image( - "a-container", f"{tmp_dir}/acontainer.sif", None, "docker.io", mock_rich_progress - ) + with pytest.raises(SingularityError.ImageNotFoundError): + singularity_fetcher.pull_image("a-container", tmp_dir / "acontainer.sif", "docker.io") - with pytest.raises(ContainerError.ImageNotFoundError): - download_obj.singularity_pull_image( - "a-container", f"{tmp_dir}/acontainer.sif", None, "ghcr.io", mock_rich_progress - ) + with pytest.raises(SingularityError.ImageNotFoundError): + singularity_fetcher.pull_image("a-container", tmp_dir / "acontainer.sif", "ghcr.io") # test Image not found for absolute URI. - with pytest.raises(ContainerError.ImageNotFoundError): - download_obj.singularity_pull_image( + with pytest.raises(SingularityError.ImageNotFoundError): + singularity_fetcher.pull_image( "docker.io/bschiffthaler/nothingtopullhere", - f"{tmp_dir}/nothingtopullhere.sif", - None, + tmp_dir / "nothingtopullhere.sif", "docker.io", - mock_rich_progress, ) # Traffic from Github Actions to GitHub's Container Registry is unlimited, so no harm should be done here. - with pytest.raises(ContainerError.InvalidTagError): - download_obj.singularity_pull_image( + with pytest.raises(SingularityError.InvalidTagError): + singularity_fetcher.pull_image( "ewels/multiqc:go-rewrite", - f"{tmp_dir}/multiqc-go.sif", - None, + tmp_dir / "multiqc-go.sif", "ghcr.io", - mock_rich_progress, ) + # + # Tests for 'DockerFetcher.pull_image' + # + # If Docker is installed, but the container can't be accessed because it does not exist or there are access + # restrictions, a RuntimeWarning is raised due to the unavailability of the image. + @pytest.mark.skipif( + shutil.which("docker") is None, + reason="Can't test what Docker does if it's not installed.", + ) + @with_temporary_folder + @mock.patch("nf_core.pipelines.download.docker.DockerProgress") + @mock.patch("rich.progress.Task") + def test_docker_pull_image_docker_installed(self, tmp_dir, mock_progress, mock_task): + tmp_dir = Path(tmp_dir) + docker_fetcher = DockerFetcher( + outdir=tmp_dir, + container_library=[], + registry_set=[], + ) + docker_fetcher.progress = mock_progress() + mock_task_obj = mock_task() + + # Test successful pull + docker_fetcher.pull_image("hello-world", mock_task_obj) + + # Test successful pull with absolute URI (use tiny 3.5MB test container from the "Kogia" project: https://github.com/bschiffthaler/kogia) + docker_fetcher.pull_image("docker.io/bschiffthaler/sed", mock_task_obj) + + # Test successful pull from wave + docker_fetcher.pull_image( + "community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6", mock_task_obj + ) + + # test image not found for several registries + with pytest.raises(DockerError.ImageNotFoundError): + docker_fetcher.pull_image("ghcr.io/not-a-real-registry/this-container-does-not-exist", mock_task_obj) + + with pytest.raises(DockerError.ImageNotFoundError): + docker_fetcher.pull_image("docker.io/not-a-real-registry/this-container-does-not-exist", mock_task_obj) + + # test image not found for absolute URI. + with pytest.raises(DockerError.ImageNotFoundError): + docker_fetcher.pull_image("docker.io/bschiffthaler/nothingtopullhere", mock_task_obj) + + # Traffic from Github Actions to GitHub's Container Registry is unlimited, so no harm should be done here. + with pytest.raises(DockerError.InvalidTagError): + docker_fetcher.pull_image("ghcr.io/ewels/multiqc:go-rewrite", mock_task_obj) + + # + # Tests for 'SingularityFetcher.pull_image' + # @pytest.mark.skipif( - shutil.which("singularity") is None, + shutil.which("singularity") is None and shutil.which("apptainer") is None, reason="Can't test what Singularity does if it's not installed.", ) @with_temporary_folder - @mock.patch("rich.progress.Progress.add_task") - def test_singularity_pull_image_successfully(self, tmp_dir, mock_rich_progress): - download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_dir) - download_obj.singularity_pull_image( - "hello-world", f"{tmp_dir}/yet-another-hello-world.sif", None, "docker.io", mock_rich_progress + @mock.patch("nf_core.pipelines.download.singularity.SingularityProgress") + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.prompt_singularity_cachedir_creation" + ) # This is to make sure that we do not prompt for a Singularity cachedir + def test_singularity_pull_image_successfully(self, tmp_dir, mock_cachedir_prompt, mock_progress): + tmp_dir = Path(tmp_dir) + singularity_fetcher = SingularityFetcher( + outdir=tmp_dir, + container_library=[], + registry_set=[], + container_cache_utilisation="none", + container_cache_index=None, + ) + singularity_fetcher.check_and_set_implementation() + singularity_fetcher.progress = mock_progress() + singularity_fetcher.pull_image("hello-world", tmp_dir / "yet-another-hello-world.sif", "docker.io") + + # + # Tests for 'DockerFetcher.pull_and_save_image' + # + @pytest.mark.skipif( + shutil.which("docker") is None, + reason="Can't test what Docker does if it's not installed.", + ) + @with_temporary_folder + @mock.patch("nf_core.pipelines.download.docker.DockerProgress") + def test_docker_pull_image_successfully(self, tmp_dir, mock_progress): + tmp_dir = Path(tmp_dir) + docker_fetcher = DockerFetcher( + outdir=tmp_dir, + container_library=[], + registry_set=[], + ) + docker_fetcher.progress = mock_progress() + docker_fetcher.pull_and_save_image("hello-world", tmp_dir / "hello-world.tar") + + # + # Tests for 'DockerFetcher.save_image' + # + @pytest.mark.skipif( + shutil.which("docker") is None, + reason="Can't test what Docker does if it's not installed.", + ) + @with_temporary_folder + @mock.patch("nf_core.pipelines.download.docker.DockerProgress") + @mock.patch("rich.progress.Task") + def test_docker_save_image(self, tmp_dir, mock_progress, mock_task): + tmp_dir = Path(tmp_dir) + docker_fetcher = DockerFetcher( + outdir=tmp_dir, + container_library=[], + registry_set=[], ) + docker_fetcher.progress = mock_progress() + mock_task_obj = mock_task() + with pytest.raises(DockerError.ImageNotPulledError): + docker_fetcher.save_image( + "this-image-cannot-possibly-be-pulled-to-this-machine:latest", + tmp_dir / "this-image-cannot-possibly-be-pulled-to-this-machine.tar", + mock_task_obj, + ) # - # Tests for 'get_singularity_images' + # + # Tests for 'SingularityFetcher.fetch_containers' # @pytest.mark.skipif( - shutil.which("singularity") is None, + shutil.which("singularity") is None and shutil.which("apptainer") is None, reason="Can't test what Singularity does if it's not installed.", ) @with_temporary_folder @mock.patch("nf_core.utils.fetch_wf_config") - def test_get_singularity_images(self, tmp_path, mock_fetch_wf_config): + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.prompt_singularity_cachedir_creation" + ) # This is to make sure that we do not prompt for a Singularity cachedir + def test_fetch_containers_singularity(self, tmp_path, mock_cachedir_prompt, mock_fetch_wf_config): + tmp_path = Path(tmp_path) download_obj = DownloadWorkflow( pipeline="dummy", outdir=tmp_path, container_library=("mirage-the-imaginative-registry.io", "quay.io", "ghcr.io", "docker.io"), + container_system="singularity", ) - mock_fetch_wf_config.return_value = { - "process.helloworld.container": "helloworld", - "process.hellooworld.container": "helloooooooworld", - "process.mapping.container": "ewels/multiqc:gorewrite", - } - download_obj.find_container_images("workflow") + download_obj.containers = [ + "helloworld", + "helloooooooworld", + "ewels/multiqc:gorewrite", + ] assert len(download_obj.container_library) == 4 # This list of fake container images should produce all kinds of ContainerErrors. - # Test that they are all caught inside get_singularity_images(). - download_obj.get_singularity_images() + # Test that they are all caught inside SingularityFetcher.fetch_containers(). + singularity_fetcher = SingularityFetcher( + outdir=tmp_path, + container_library=download_obj.container_library, + registry_set=download_obj.registry_set, + container_cache_utilisation="none", + container_cache_index=None, + ) + singularity_fetcher.fetch_containers( + download_obj.containers, + download_obj.containers_remote, + ) + # + # + # Tests for 'DockerFetcher.fetch_containers' + # + @pytest.mark.skipif( + shutil.which("singularity") is None and shutil.which("apptainer") is None, + reason="Can't test what Singularity does if it's not installed.", + ) @with_temporary_folder - @mock.patch("os.makedirs") + @mock.patch("nf_core.utils.fetch_wf_config") + def test_fetch_containers_docker(self, tmp_path, mock_fetch_wf_config): + tmp_path = Path(tmp_path) + download_obj = DownloadWorkflow( + pipeline="dummy", + outdir=tmp_path, + container_library=None, + container_system="docker", + ) + download_obj.containers = [ + "helloworld", + "helloooooooworld", + "ewels/multiqc:gorewrite", + ] + # This list of fake container images should produce all kinds of ContainerErrors. + # Test that they are all caught inside DockerFetcher.fetch_containers(). + docker_fetcher = DockerFetcher( + outdir=tmp_path, + container_library=download_obj.container_library, + registry_set=download_obj.registry_set, + ) + docker_fetcher.fetch_containers( + download_obj.containers, + download_obj.containers_remote, + ) + + # + # Tests for 'singularity.symlink_registries' function + # + + # Simple file name with no registry in it + @with_temporary_folder + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.check_and_set_implementation" + ) # This is to make sure that we do not check for Singularity/Apptainer installation + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.prompt_singularity_cachedir_creation" + ) # This is to make sure that we do not prompt for a Singularity cachedir + @mock.patch("pathlib.Path.mkdir") + @mock.patch("pathlib.Path.symlink_to") @mock.patch("os.symlink") @mock.patch("os.open") @mock.patch("os.close") - @mock.patch("os.path.basename") - @mock.patch("os.path.dirname") + @mock.patch("pathlib.Path.name") + @mock.patch("pathlib.Path.parent") def test_symlink_singularity_images( self, tmp_path, @@ -590,63 +1058,82 @@ def test_symlink_singularity_images( mock_basename, mock_close, mock_open, + mock_os_symlink, mock_symlink, mock_makedirs, + mock_prompt_singularity_cachedir_creation, + mock_check_and_set_implementation, ): # Setup - mock_dirname.return_value = f"{tmp_path}/path/to" - mock_basename.return_value = "singularity-image.img" - mock_open.return_value = 12 # file descriptor - mock_close.return_value = 12 # file descriptor - - download_obj = DownloadWorkflow( - pipeline="dummy", - outdir=tmp_path, - container_library=( + tmp_path = Path(tmp_path) + with ( + mock.patch.object(Path, "name", new_callable=mock.PropertyMock) as mock_basename, + mock.patch.object(Path, "parent", new_callable=mock.PropertyMock) as mock_dirname, + ): + mock_dirname.return_value = tmp_path / "path/to" + mock_basename.return_value = "singularity-image.img" + mock_open.return_value = 12 # file descriptor + mock_close.return_value = 12 # file descriptor + mock_prompt_singularity_cachedir_creation.return_value = False + + registries = [ "quay.io", "community-cr-prod.seqera.io/docker/registry/v2", "depot.galaxyproject.org/singularity", - ), - ) + ] + fetcher = SingularityFetcher( + outdir=tmp_path, + container_library=[], + registry_set=registries, + container_cache_utilisation="none", + container_cache_index=None, + ) - # Call the method - download_obj.symlink_singularity_images(f"{tmp_path}/path/to/singularity-image.img") + fetcher.symlink_registries(tmp_path / "path/to/singularity-image.img") - # Check that os.makedirs was called with the correct arguments - mock_makedirs.assert_any_call(f"{tmp_path}/path/to", exist_ok=True) + # Check that os.makedirs was called with the correct arguments + mock_makedirs.assert_any_call(exist_ok=True) - # Check that os.open was called with the correct arguments - mock_open.assert_any_call(f"{tmp_path}/path/to", os.O_RDONLY) + # Check that os.open was called with the correct arguments + mock_open.assert_any_call(tmp_path / "path/to", os.O_RDONLY) - # Check that os.symlink was called with the correct arguments - expected_calls = [ - mock.call( - "./singularity-image.img", - "./quay.io-singularity-image.img", - dir_fd=12, - ), - mock.call( - "./singularity-image.img", - "./community-cr-prod.seqera.io-docker-registry-v2-singularity-image.img", - dir_fd=12, - ), - mock.call( - "./singularity-image.img", - "./depot.galaxyproject.org-singularity-singularity-image.img", - dir_fd=12, - ), - ] - mock_symlink.assert_has_calls(expected_calls, any_order=True) + # Check that os.symlink was called with the correct arguments + expected_calls = [ + mock.call( + Path("./singularity-image.img"), + Path("./quay.io-singularity-image.img"), + dir_fd=12, + ), + mock.call( + Path("./singularity-image.img"), + Path("./community-cr-prod.seqera.io-docker-registry-v2-singularity-image.img"), + dir_fd=12, + ), + mock.call( + Path("./singularity-image.img"), + Path("./depot.galaxyproject.org-singularity-singularity-image.img"), + dir_fd=12, + ), + ] + mock_os_symlink.assert_has_calls(expected_calls, any_order=True) + # File name with registry in it @with_temporary_folder - @mock.patch("os.makedirs") + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.check_and_set_implementation" + ) # This is to make sure that we do not check for Singularity/Apptainer installation + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.prompt_singularity_cachedir_creation" + ) # This is to make sure that we do not prompt for a Singularity cachedir + @mock.patch("pathlib.Path.mkdir") + @mock.patch("pathlib.Path.symlink_to") @mock.patch("os.symlink") @mock.patch("os.open") @mock.patch("os.close") @mock.patch("re.sub") - @mock.patch("os.path.basename") - @mock.patch("os.path.dirname") - def test_symlink_singularity_images_registry( + @mock.patch("pathlib.Path.name") + @mock.patch("pathlib.Path.parent") + def test_symlink_singularity_symlink_registries( self, tmp_path, mock_dirname, @@ -654,53 +1141,79 @@ def test_symlink_singularity_images_registry( mock_resub, mock_close, mock_open, + mock_os_symlink, mock_symlink, mock_makedirs, + mock_prompt_singularity_cachedir_creation, + mock_check_and_set_implementation, ): + tmp_path = Path(tmp_path) # Setup - mock_resub.return_value = "singularity-image.img" - mock_dirname.return_value = f"{tmp_path}/path/to" - mock_basename.return_value = "quay.io-singularity-image.img" - mock_open.return_value = 12 # file descriptor - mock_close.return_value = 12 # file descriptor - - download_obj = DownloadWorkflow( - pipeline="dummy", - outdir=tmp_path, - container_library=("quay.io", "community-cr-prod.seqera.io/docker/registry/v2"), - ) - - download_obj.registry_set = {"quay.io", "community-cr-prod.seqera.io/docker/registry/v2"} - - # Call the method with registry - should not happen, but preserve it then. - download_obj.symlink_singularity_images(f"{tmp_path}/path/to/quay.io-singularity-image.img") - print(mock_resub.call_args) + with ( + mock.patch.object(Path, "name", new_callable=mock.PropertyMock) as mock_basename, + mock.patch.object(Path, "parent", new_callable=mock.PropertyMock) as mock_dirname, + ): + mock_resub.return_value = "singularity-image.img" + mock_dirname.return_value = tmp_path / "path/to" + mock_basename.return_value = "quay.io-singularity-image.img" + mock_open.return_value = 12 # file descriptor + mock_close.return_value = 12 # file descriptor + mock_prompt_singularity_cachedir_creation.return_value = False + + # Call the method with registry name included - should not happen, but preserve it then. + + registries = [ + "quay.io", # Same as in the filename + "community-cr-prod.seqera.io/docker/registry/v2", + ] + fetcher = SingularityFetcher( + outdir=tmp_path, + container_library=[], + registry_set=registries, + container_cache_utilisation="none", + container_cache_index=None, + ) + fetcher.symlink_registries(tmp_path / "path/to/quay.io-singularity-image.img") + + # Check that os.makedirs was called with the correct arguments + mock_makedirs.assert_called_once_with(exist_ok=True) + + # Check that os.symlink was called with the correct arguments + # assert_called_once_with also tells us that there was no attempt to + # - symlink to itself + # - symlink to the same registry + mock_os_symlink.assert_called_once_with( + Path("./quay.io-singularity-image.img"), + Path( + "./community-cr-prod.seqera.io-docker-registry-v2-singularity-image.img" + ), # "quay.io-" has been trimmed + dir_fd=12, + ) - # Check that os.makedirs was called with the correct arguments - mock_makedirs.assert_any_call(f"{tmp_path}/path/to", exist_ok=True) + # Normally it would be called for each registry, but since quay.io is part of the name, it + # will only be called once, as no symlink to itself must be created. + mock_open.assert_called_once_with(tmp_path / "path/to", os.O_RDONLY) - # Check that os.symlink was called with the correct arguments - mock_symlink.assert_called_with( - "./quay.io-singularity-image.img", - "./community-cr-prod.seqera.io-docker-registry-v2-singularity-image.img", - dir_fd=12, - ) - # Check that there is no attempt to symlink to itself (test parameters would result in that behavior if not checked in the function) - assert ( - unittest.mock.call("./quay.io-singularity-image.img", "./quay.io-singularity-image.img", dir_fd=12) - not in mock_symlink.call_args_list + # + # Test for DockerFetcher.write_docker_load_command + # + def test_docker_write_docker_load_message(self): + docker_fetcher = DockerFetcher( + outdir=Path("dummydir"), + container_library=[], + registry_set=[], ) + with redirect_stderr(StringIO()) as f: + docker_fetcher.write_docker_load_message() + assert "ls -1 *.tar | xargs --no-run-if-empty -L 1 docker load -i" in f.getvalue() + assert "dummydir/docker-images" in f.getvalue() - # Normally it would be called for each registry, but since quay.io is part of the name, it - # will only be called once, as no symlink to itself must be created. - mock_open.assert_called_once_with(f"{tmp_path}/path/to", os.O_RDONLY) - - # # Test for gather_registries' # @with_temporary_folder @mock.patch("nf_core.utils.fetch_wf_config") def test_gather_registries(self, tmp_path, mock_fetch_wf_config): + tmp_path = Path(tmp_path) download_obj = DownloadWorkflow( pipeline="dummy", outdir=tmp_path, @@ -737,125 +1250,133 @@ def test_gather_registries(self, tmp_path, mock_fetch_wf_config): # If Singularity is not installed, it raises a OSError because the singularity command can't be found. # @pytest.mark.skipif( - shutil.which("singularity") is not None, + shutil.which("singularity") is not None or shutil.which("apptainer") is not None, reason="Can't test how the code behaves when singularity is not installed if it is.", ) @with_temporary_folder @mock.patch("rich.progress.Progress.add_task") - def test_singularity_pull_image_singularity_not_installed(self, tmp_dir, mock_rich_progress): - download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_dir) + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.prompt_singularity_cachedir_creation" + ) # This is to make sure that we do not prompt for a Singularity cachedir + def test_singularity_pull_image_singularity_not_installed(self, tmp_dir, mock_rich_progress, mock_cachedir_prompt): + tmp_dir = Path(tmp_dir) + fetcher = SingularityFetcher( + outdir=tmp_dir, + container_library=[], + registry_set=[], + container_cache_utilisation="none", + container_cache_index=None, + ) with pytest.raises(OSError): - download_obj.singularity_pull_image( - "a-container", f"{tmp_dir}/anothercontainer.sif", None, "quay.io", mock_rich_progress - ) + fetcher.check_and_set_implementation() # - # Test for 'singularity_image_filenames' function + # Test for 'singularity.get_container_filename' function # - @with_temporary_folder - def test_singularity_image_filenames(self, tmp_path): - os.environ["NXF_SINGULARITY_CACHEDIR"] = f"{tmp_path}/cachedir" - download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_path) - download_obj.outdir = tmp_path - download_obj.container_cache_utilisation = "amend" - - download_obj.registry_set = { + @mock.patch("nf_core.pipelines.download.singularity.SingularityFetcher.check_and_set_implementation") + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.prompt_singularity_cachedir_creation" + ) # This is to make sure that we do not prompt for a Singularity cachedir + def test_singularity_get_container_filename(self, mock_cachedir_prompt, mock_check_and_set_implementation): + registries = [ "docker.io", "quay.io", "depot.galaxyproject.org/singularity", "community.wave.seqera.io/library", "community-cr-prod.seqera.io/docker/registry/v2", - } - - ## Test phase I: Container not yet cached, should be amended to cache - # out_path: str, Path to cache - # cache_path: None + ] - result = download_obj.singularity_image_filenames( - "https://depot.galaxyproject.org/singularity/bbmap:38.93--he522d1c_0" + fetcher = SingularityFetcher( + outdir=Path("test_singularity_get_container_filename"), + container_library=[], + registry_set=registries, + container_cache_utilisation="none", + container_cache_index=None, ) - - # Assert that the result is a tuple of length 2 - self.assertIsInstance(result, tuple) - self.assertEqual(len(result), 2) - - # Assert that the types of the elements are (str, None) - self.assertTrue(all((isinstance(element, str), element is None) for element in result)) - - # assert that the correct out_path is returned that points to the cache - assert result[0].endswith("/cachedir/bbmap-38.93--he522d1c_0.img") - - ## Test phase II: Test various container names - # out_path: str, Path to cache - # cache_path: None + # Test --- galaxy URL # + result = fetcher.get_container_filename( + "https://depot.galaxyproject.org/singularity/bbmap:38.93--he522d1c_0", + ) + assert result == "bbmap-38.93--he522d1c_0.img" # Test --- mulled containers # - result = download_obj.singularity_image_filenames( - "quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0" + result = fetcher.get_container_filename( + "quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0", ) - assert result[0].endswith( - "/cachedir/biocontainers-mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2-59cdd445419f14abac76b31dd0d71217994cbcc9-0.img" + assert ( + result + == "biocontainers-mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2-59cdd445419f14abac76b31dd0d71217994cbcc9-0.img" ) # Test --- Docker containers without registry # - result = download_obj.singularity_image_filenames("nf-core/ubuntu:20.04") - assert result[0].endswith("/cachedir/nf-core-ubuntu-20.04.img") + result = fetcher.get_container_filename("nf-core/ubuntu:20.04") + assert result == "nf-core-ubuntu-20.04.img" # Test --- Docker container with explicit registry -> should be trimmed # - result = download_obj.singularity_image_filenames("docker.io/nf-core/ubuntu:20.04") - assert result[0].endswith("/cachedir/nf-core-ubuntu-20.04.img") + result = fetcher.get_container_filename("docker.io/nf-core/ubuntu:20.04") + assert result == "nf-core-ubuntu-20.04.img" - # Test --- Docker container with explicit registry not in registry set -> can't be trimmed - result = download_obj.singularity_image_filenames("mirage-the-imaginative-registry.io/nf-core/ubuntu:20.04") - assert result[0].endswith("/cachedir/mirage-the-imaginative-registry.io-nf-core-ubuntu-20.04.img") + # Test --- Docker container with explicit registry not in registry list -> can't be trimmed + result = fetcher.get_container_filename("mirage-the-imaginative-registry.io/nf-core/ubuntu:20.04") + assert result == "mirage-the-imaginative-registry.io-nf-core-ubuntu-20.04.img" # Test --- Seqera Docker containers: Trimmed, because it is hard-coded in the registry set. - result = download_obj.singularity_image_filenames( - "community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264" - ) - assert result[0].endswith("/cachedir/coreutils-9.5--ae99c88a9b28c264.img") + result = fetcher.get_container_filename("community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264") + assert result == "coreutils-9.5--ae99c88a9b28c264.img" # Test --- Seqera Singularity containers: Trimmed, because it is hard-coded in the registry set. - result = download_obj.singularity_image_filenames( - "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data" + result = fetcher.get_container_filename( + "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data", ) - assert result[0].endswith( - "cachedir/blobs-sha256-c2-c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975-data.img" + assert result == "blobs-sha256-c2-c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975-data.img" + + # Test --- Seqera Oras containers: Trimmed, because it is hard-coded in the registry set. + result = fetcher.get_container_filename( + "oras://community.wave.seqera.io/library/umi-transfer:1.0.0--e5b0c1a65b8173b6", ) + assert result == "umi-transfer-1.0.0--e5b0c1a65b8173b6.img" - ## Test phase III: Container will be cached but also copied to out_path - # out_path: str, Path to cache - # cache_path: str, Path to cache - download_obj.container_cache_utilisation = "copy" - result = download_obj.singularity_image_filenames( - "https://depot.galaxyproject.org/singularity/bbmap:38.93--he522d1c_0" + # Test --- SIF Singularity container with explicit registry -> should be trimmed # + result = fetcher.get_container_filename( + "docker.io-hashicorp-vault-1.16-sha256:e139ff28c23e1f22a6e325696318141259b177097d8e238a3a4c5b84862fadd8.sif", + ) + assert ( + result == "hashicorp-vault-1.16-sha256-e139ff28c23e1f22a6e325696318141259b177097d8e238a3a4c5b84862fadd8.sif" ) - self.assertTrue(all(isinstance(element, str) for element in result)) - assert result[0].endswith("/singularity-images/bbmap-38.93--he522d1c_0.img") - assert result[1].endswith("/cachedir/bbmap-38.93--he522d1c_0.img") + # Test --- SIF Singularity container without registry # + result = fetcher.get_container_filename( + "singularity-hpc/shpc/tests/testdata/salad_latest.sif", + ) + assert result == "singularity-hpc-shpc-tests-testdata-salad_latest.sif" - ## Test phase IV: Expect an error if no NXF_SINGULARITY_CACHEDIR is defined - os.environ["NXF_SINGULARITY_CACHEDIR"] = "" - with self.assertRaises(FileNotFoundError): - download_obj.singularity_image_filenames( - "https://depot.galaxyproject.org/singularity/bbmap:38.93--he522d1c_0" - ) + # Test --- Singularity container from a Singularity registry (and version tag) # + result = fetcher.get_container_filename( + "library://pditommaso/foo/bar.sif:latest", + ) + assert result == "pditommaso-foo-bar-latest.sif" + + # Test --- galaxy URL but no registry given # + fetcher.registry_set = [] + result = fetcher.get_container_filename("https://depot.galaxyproject.org/singularity/bbmap:38.93--he522d1c_0") + assert result == "depot.galaxyproject.org-singularity-bbmap-38.93--he522d1c_0.img" # # Test for '--singularity-cache remote --singularity-cache-index'. Provide a list of containers already available in a remote location. # @with_temporary_folder def test_remote_container_functionality(self, tmp_dir): - os.environ["NXF_SINGULARITY_CACHEDIR"] = "foo" + tmp_dir = Path(tmp_dir) + os.environ["NXF_SINGULARITY_CACHEDIR"] = str(tmp_dir / "foo") download_obj = DownloadWorkflow( pipeline="nf-core/rnaseq", - outdir=os.path.join(tmp_dir, "new"), + outdir=(tmp_dir / "new"), revision="3.9", compress_type="none", - container_cache_index=str(Path(TEST_DATA_DIR, "testdata_remote_containers.txt")), + container_cache_index=Path(TEST_DATA_DIR, "testdata_remote_containers.txt"), + container_system="singularity", ) download_obj.include_configs = False # suppress prompt, because stderr.is_interactive doesn't. @@ -864,27 +1385,36 @@ def test_remote_container_functionality(self, tmp_dir): assert download_obj.container_cache_utilisation == "remote" and download_obj.container_system == "singularity" assert isinstance(download_obj.containers_remote, list) and len(download_obj.containers_remote) == 0 # read in the file - download_obj.read_remote_containers() - assert len(download_obj.containers_remote) == 33 - assert "depot.galaxyproject.org-singularity-salmon-1.5.2--h84f40af_0.img" in download_obj.containers_remote - assert "MV Rena" not in download_obj.containers_remote # decoy in test file + containers_remote = SingularityFetcher.read_remote_singularity_containers(download_obj.container_cache_index) + assert len(containers_remote) == 33 + assert "depot.galaxyproject.org-singularity-salmon-1.5.2--h84f40af_0.img" in containers_remote + assert "MV Rena" not in containers_remote # decoy in test file # # Tests for the main entry method 'download_workflow' # + + # We do not want to download all containers, so we mock the download by just touching the singularity files + def mock_download_file(self, remote_path: str, output_path: str): + Path(output_path).touch() # Create an empty file at the output path + @with_temporary_folder - @mock.patch("nf_core.pipelines.download.DownloadWorkflow.singularity_pull_image") - @mock.patch("shutil.which") - def test_download_workflow_with_success(self, tmp_dir, mock_download_image, mock_singularity_installed): - os.environ["NXF_SINGULARITY_CACHEDIR"] = "foo" + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.check_and_set_implementation" + ) # This is to make sure that we do not check for Singularity/Apptainer installation + @mock.patch.object(nf_core.pipelines.download.singularity.FileDownloader, "download_file", new=mock_download_file) + def test_download_workflow_with_success(self, tmp_dir, mock_check_and_set_implementation): + tmp_dir = Path(tmp_dir) + os.environ["NXF_SINGULARITY_CACHEDIR"] = str(tmp_dir / "foo") download_obj = DownloadWorkflow( - pipeline="nf-core/methylseq", - outdir=os.path.join(tmp_dir, "new"), + pipeline="nf-core/bamtofastq", + outdir=tmp_dir / "new", container_system="singularity", - revision="1.6", + revision="2.2.0", compress_type="none", container_cache_utilisation="copy", + parallel=1, ) download_obj.include_configs = True # suppress prompt, because stderr.is_interactive doesn't. @@ -894,11 +1424,20 @@ def test_download_workflow_with_success(self, tmp_dir, mock_download_image, mock # Test Download for Seqera Platform # @with_temporary_folder - @mock.patch("nf_core.pipelines.download.DownloadWorkflow.get_singularity_images") - def test_download_workflow_for_platform(self, tmp_dir, _): + @mock.patch( + "nf_core.pipelines.download.singularity.SingularityFetcher.check_and_set_implementation" + ) # This is to make sure that we do not check for Singularity/Apptainer installation + @mock.patch("nf_core.pipelines.download.singularity.SingularityFetcher.fetch_containers") + def test_download_workflow_for_platform( + self, + tmp_dir, + mock_fetch_containers, + mock_check_and_set_implementation, + ): + tmp_dir = Path(tmp_dir) download_obj = DownloadWorkflow( pipeline="nf-core/rnaseq", - revision=("3.7", "3.9"), + revision=("3.19.0", "3.17.0"), compress_type="none", platform=True, container_system="singularity", @@ -925,9 +1464,10 @@ def test_download_workflow_for_platform(self, tmp_dir, _): assert isinstance(download_obj.wf_download_url, dict) and len(download_obj.wf_download_url) == 0 # The outdir for multiple revisions is the pipeline name and date: e.g. nf-core-rnaseq_2023-04-27_18-54 - assert bool(re.search(r"nf-core-rnaseq_\d{4}-\d{2}-\d{1,2}_\d{1,2}-\d{1,2}", download_obj.outdir, re.S)) + assert isinstance(download_obj.outdir, Path) + assert bool(re.search(r"nf-core-rnaseq_\d{4}-\d{2}-\d{1,2}_\d{1,2}-\d{1,2}", str(download_obj.outdir), re.S)) - download_obj.output_filename = f"{download_obj.outdir}.git" + download_obj.output_filename = download_obj.outdir.with_suffix(".git") download_obj.download_workflow_platform(location=tmp_dir) assert download_obj.workflow_repo @@ -942,15 +1482,12 @@ def test_download_workflow_for_platform(self, tmp_dir, _): # assert that the download has a "latest" branch. assert "latest" in all_heads - # download_obj.download_workflow_platform(location=tmp_dir) will run container image detection for all requested revisions - assert isinstance(download_obj.containers, list) and len(download_obj.containers) == 33 + # download_obj.download_workflow_platform(location=tmp_dir) will run `nextflow inspect` for each revision + # This means that the containers in download_obj.containers are the containers the last specified revision i.e. 3.17 + assert isinstance(download_obj.containers, list) and len(download_obj.containers) == 39 assert ( - "https://depot.galaxyproject.org/singularity/bbmap:38.93--he522d1c_0" in download_obj.containers + "https://depot.galaxyproject.org/singularity/bbmap:39.10--h92535d8_0" in download_obj.containers ) # direct definition - assert ( - "https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0" - in download_obj.containers - ) # indirect definition via $container variable. # clean-up # remove "nf-core-rnaseq*" directories @@ -960,9 +1497,10 @@ def test_download_workflow_for_platform(self, tmp_dir, _): # # Brief test adding a single custom tag to Seqera Platform download # - @mock.patch("nf_core.pipelines.download.DownloadWorkflow.get_singularity_images") + @mock.patch("nf_core.pipelines.download.singularity.SingularityFetcher.fetch_containers") @with_temporary_folder def test_download_workflow_for_platform_with_one_custom_tag(self, _, tmp_dir): + tmp_dir = Path(tmp_dir) download_obj = DownloadWorkflow( pipeline="nf-core/rnaseq", revision=("3.9"), @@ -981,9 +1519,10 @@ def test_download_workflow_for_platform_with_one_custom_tag(self, _, tmp_dir): # # Test adding custom tags to Seqera Platform download (full test) # - @mock.patch("nf_core.pipelines.download.DownloadWorkflow.get_singularity_images") + @mock.patch("nf_core.pipelines.download.singularity.SingularityFetcher.fetch_containers") @with_temporary_folder def test_download_workflow_for_platform_with_custom_tags(self, _, tmp_dir): + tmp_dir = Path(tmp_dir) with self._caplog.at_level(logging.INFO): from git.refs.tag import TagReference diff --git a/tests/test_cli.py b/tests/test_cli.py index 6f4b7f894c..8531c2ff0d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -161,6 +161,7 @@ def test_cli_launch_fail(self, mock_launcher): @mock.patch("nf_core.pipelines.download.DownloadWorkflow") def test_cli_download(self, mock_dl): """Test nf-core pipeline is downloaded and cli parameters are passed on.""" + toplevel_params = {"hide-progress": None} params = { "revision": "abcdef", "outdir": "/path/outdir", @@ -176,7 +177,12 @@ def test_cli_download(self, mock_dl): "parallel-downloads": 2, } - cmd = ["pipelines", "download"] + self.assemble_params(params) + ["pipeline_name"] + cmd = ( + self.assemble_params(toplevel_params) + + ["pipelines", "download"] + + self.assemble_params(params) + + ["pipeline_name"] + ) result = self.invoke_cli(cmd) assert result.exit_code == 0 @@ -195,6 +201,7 @@ def test_cli_download(self, mock_dl): params["container-cache-utilisation"], params["container-cache-index"], params["parallel-downloads"], + "hide-progress" in toplevel_params, ) mock_dl.return_value.download_workflow.assert_called_once()