diff --git a/pyani/scripts/subcommands/subcmd_download.py b/pyani/scripts/subcommands/subcmd_download.py index ac991bed..98185151 100644 --- a/pyani/scripts/subcommands/subcmd_download.py +++ b/pyani/scripts/subcommands/subcmd_download.py @@ -40,17 +40,23 @@ """Provides the download subcommand for pyani.""" import logging +import subprocess from argparse import Namespace from typing import Dict, List, NamedTuple, Optional, Tuple from Bio import SeqIO -from pyani import download +from pyani import download, PyaniException from pyani.pyani_tools import termcolor from pyani.scripts import make_outdir +class PyaniDownloadException(PyaniException): + + """Exception raised when a download or archive extraction fails.""" + + class Skipped(NamedTuple): """Convenience struct for holding information about skipped genomes.""" @@ -98,7 +104,9 @@ def dl_info_to_str(esummary, uid_class) -> str: def download_data( - args: Namespace, api_key: Optional[str], asm_dict: Dict[str, List], + args: Namespace, + api_key: Optional[str], + asm_dict: Dict[str, List], ) -> Tuple[List, List, List]: """Download the accessions indicated in the passed dictionary. @@ -131,7 +139,14 @@ def download_data( exc_info=True, ) skippedlist.append( - Skipped(tid, uid, "", "", None, "RefSeq",) + Skipped( + tid, + uid, + "", + "", + None, + "RefSeq", + ) ) # pylint: disable=no-member continue @@ -154,7 +169,13 @@ def download_data( ) skippedlist.extend(skipped_genomes) if not dlstatus.skipped: - extract_genomes(args, dlstatus, esummary) + try: + extract_genomes(args, dlstatus, esummary) + except PyaniDownloadException: + logger.warning( + "Could not extract %s; continuing", dlstatus.outfname + ) + continue labeltxt, classtxt = hash_genomes(args, dlstatus, filestem, uid_class) classes.append(classtxt) labels.append(labeltxt) @@ -182,7 +203,10 @@ def extract_genomes(args: Namespace, dlstatus: download.DLStatus, esummary) -> N logger.warning("Output file %s exists, not extracting", ename) else: logger.debug("Extracting archive %s to %s", dlstatus.outfname, ename) - download.extract_contigs(dlstatus.outfname, ename) + try: + download.extract_contigs(dlstatus.outfname, ename) + except subprocess.CalledProcessError: + raise PyaniDownloadException # Modify sequence ID header if Kraken option active if args.kraken: diff --git a/tests/conftest.py b/tests/conftest.py index 4b88f323..bea937ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -130,6 +130,12 @@ def dir_anim_in(): return FIXTUREPATH / "anim" +@pytest.fixture +def dir_download_out(): + """Output directory for download tests.""" + return FIXTUREPATH / "download" + + @pytest.fixture def dir_fastani_in(): """Input files for fastANI tests.""" diff --git a/tests/fixtures/download/bad_location.txt b/tests/fixtures/download/bad_location.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_subcmd_01_download.py b/tests/test_subcmd_01_download.py index 095494ad..d141da9e 100644 --- a/tests/test_subcmd_01_download.py +++ b/tests/test_subcmd_01_download.py @@ -58,6 +58,7 @@ """ import logging +import subprocess from argparse import Namespace from pathlib import Path @@ -123,6 +124,12 @@ def test_create_hash(): download.create_hash(test_file) +def test_failed_extract_contigs(dir_download_out): + """Test for failed extraction of zip file contents.""" + with assertions.assertRaises(subprocess.CalledProcessError): + download.extract_contigs("bad/file.gz", dir_download_out / "bad_location.txt") + + def test_download_dry_run(dryrun_namespace): """Dry run of C. blochmannia download.""" subcommands.subcmd_download(dryrun_namespace)