Deprecate unused code (#654)

* Cleanup bed * Cleanup gff * use all * Add testcases to autoscale * division fix * Deprecate some assembly code * sort complex files
tanghaibao · Apr 30, 2024 · cc08a2c · cc08a2c
1 parent 31bf72b
commit cc08a2c
Show file tree

Hide file tree

Showing 10 changed files with 331 additions and 1,704 deletions.
diff --git a/jcvi/assembly/ca.py b/jcvi/assembly/ca.py
diff --git a/jcvi/assembly/preprocess.py b/jcvi/assembly/preprocess.py
@@ -7,28 +7,28 @@
 import os
 import os.path as op
 import sys
-import logging
 
-from jcvi.formats.base import BaseFile, write_file, must_open
-from jcvi.formats.fastq import guessoffset
-from jcvi.utils.cbook import depends, human_size
-from jcvi.apps.base import (
+from ..apps.base import (
     OptionParser,
     ActionDispatcher,
     cleanup,
+    datadir,
     download,
-    sh,
+    logger,
     mkdir,
     need_update,
-    datadir,
+    sh,
 )
+from ..formats.base import BaseFile, must_open, write_file
+from ..formats.fastq import guessoffset
+from ..utils.cbook import depends, human_size
 
 
 class FastQCdata(BaseFile, dict):
     def __init__(self, filename, human=False):
         super(FastQCdata, self).__init__(filename)
         if not op.exists(filename):
-            logging.debug("File `{0}` not found.".format(filename))
+            logger.debug("File `%s` not found.", filename)
             # Sample_RF37-1/RF37-1_GATCAG_L008_R2_fastqc =>
             # RF37-1_GATCAG_L008_R2
             self["Filename"] = op.basename(op.split(filename)[0]).rsplit("_", 1)[0]
@@ -66,14 +66,13 @@ def __init__(self, filename, human=False):
 def main():
 
     actions = (
-        ("count", "count reads based on FASTQC results"),
-        ("trim", "trim reads using TRIMMOMATIC"),
-        ("correct", "correct reads using ALLPATHS-LG"),
-        ("hetsmooth", "reduce K-mer diversity using het-smooth"),
-        ("alignextend", "increase read length by extending based on alignments"),
         ("contamination", "check reads contamination against Ecoli"),
+        ("correct", "correct reads using ALLPATHS-LG"),
+        ("count", "count reads based on FASTQC results"),
         ("diginorm", "run K-mer based normalization"),
         ("expand", "expand sequences using short reads"),
+        ("hetsmooth", "reduce K-mer diversity using het-smooth"),
+        ("trim", "trim reads using TRIMMOMATIC"),
     )
     p = ActionDispatcher(actions)
     p.dispatch(globals())
@@ -205,7 +204,7 @@ def expand(args):
     )
 
     samfile, mapped, _ = get_samfile(reads, bes, bowtie=True, mapped=True)
-    logging.debug("Extract first {0} reads from `{1}`.".format(nreads, mapped))
+    logger.debug("Extract first %d reads from `%s`.", nreads, mapped)
 
     pf = mapped.split(".")[0]
     pf = pf.split("-")[0]
@@ -252,9 +251,7 @@ def expand(args):
     fw.close()
 
     cleanup(samfile, logfile, mapped, reads, fastafile, qualfile, blastfile, pf)
-    logging.debug(
-        "Annotated seqs (n={0}) written to `{1}`.".format(len(recs), annotatedfasta)
-    )
+    logger.debug("Annotated seqs (n=%d) written to `%s`.", len(recs), annotatedfasta)
 
     return annotatedfasta
 
@@ -302,68 +299,6 @@ def contamination(args):
     fw.close()
 
 
-def alignextend(args):
-    """
-    %prog alignextend ref.fasta read.1.fastq read.2.fastq
-
-    Wrapper around AMOS alignextend.
-    """
-    choices = "prepare,align,filter,rmdup,genreads".split(",")
-    p = OptionParser(alignextend.__doc__)
-    p.add_option(
-        "--nosuffix",
-        default=False,
-        action="store_true",
-        help="Do not add /1/2 suffix to the read",
-    )
-    p.add_option(
-        "--rc",
-        default=False,
-        action="store_true",
-        help="Reverse complement the reads before alignment",
-    )
-    p.add_option("--len", default=100, type="int", help="Extend to this length")
-    p.add_option(
-        "--stage", default="prepare", choices=choices, help="Start from certain stage"
-    )
-    p.add_option(
-        "--dup",
-        default=10,
-        type="int",
-        help="Filter duplicates with coordinates within this distance",
-    )
-    p.add_option(
-        "--maxdiff", default=1, type="int", help="Maximum number of differences"
-    )
-    p.set_home("amos")
-    p.set_cpus()
-    opts, args = p.parse_args(args)
-
-    if len(args) != 3:
-        sys.exit(not p.print_help())
-
-    ref, r1, r2 = args
-    pf = op.basename(r1).split(".")[0]
-    cmd = op.join(opts.amos_home, "src/Experimental/alignextend.pl")
-    if not opts.nosuffix:
-        cmd += " -suffix"
-    bwa_idx = "{0}.ref.fa.sa".format(pf)
-    if not need_update(ref, bwa_idx):
-        cmd += " -noindex"
-    cmd += " -threads {0}".format(opts.cpus)
-    offset = guessoffset([r1])
-    if offset == 64:
-        cmd += " -I"
-    if opts.rc:
-        cmd += " -rc"
-    cmd += " -allow -len {0} -dup {1}".format(opts.len, opts.dup)
-    cmd += " -min {0} -max {1}".format(2 * opts.len, 20 * opts.len)
-    cmd += " -maxdiff {0}".format(opts.maxdiff)
-    cmd += " -stage {0}".format(opts.stage)
-    cmd += " ".join(("", pf, ref, r1, r2))
-    sh(cmd)
-
-
 def count(args):
     """
     %prog count *.gz

diff --git a/jcvi/formats/base.py b/jcvi/formats/base.py
@@ -7,7 +7,7 @@
 import sys
 
 from collections import OrderedDict
-from itertools import groupby, islice, cycle
+from itertools import cycle, groupby, islice
 
 from Bio import SeqIO
 from ..apps.base import (
@@ -337,7 +337,13 @@ def timestamp():
     return "{0}{1:02d}{2:02d}".format(dt.now().year, dt.now().month, dt.now().day)
 
 
-def must_open(filename, mode="r", checkexists=False, skipcheck=False, oappend=False):
+def must_open(
+    filename: str,
+    mode: str = "r",
+    checkexists: bool = False,
+    skipcheck: bool = False,
+    oappend: bool = False,
+):
     """
     Accepts filename and returns filehandle.
 
@@ -385,7 +391,7 @@ def must_open(filename, mode="r", checkexists=False, skipcheck=False, oappend=Fa
 
     elif filename.endswith(".bz2"):
         if "r" in mode:
-            cmd = "bzcat {0}".format(filename)
+            cmd = f"bzcat {filename}"
             fp = popen(cmd, debug=False)
         elif "w" in mode:
             import bz2