Skip to content

Commit

Permalink
Deprecate unused code (#654)
Browse files Browse the repository at this point in the history
* Cleanup bed

* Cleanup gff

* use all

* Add testcases to autoscale

* division fix

* Deprecate some assembly code

* sort complex files
  • Loading branch information
tanghaibao authored Apr 30, 2024
1 parent 31bf72b commit cc08a2c
Show file tree
Hide file tree
Showing 10 changed files with 331 additions and 1,704 deletions.
1,216 changes: 0 additions & 1,216 deletions jcvi/assembly/ca.py

This file was deleted.

93 changes: 14 additions & 79 deletions jcvi/assembly/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,28 @@
import os
import os.path as op
import sys
import logging

from jcvi.formats.base import BaseFile, write_file, must_open
from jcvi.formats.fastq import guessoffset
from jcvi.utils.cbook import depends, human_size
from jcvi.apps.base import (
from ..apps.base import (
OptionParser,
ActionDispatcher,
cleanup,
datadir,
download,
sh,
logger,
mkdir,
need_update,
datadir,
sh,
)
from ..formats.base import BaseFile, must_open, write_file
from ..formats.fastq import guessoffset
from ..utils.cbook import depends, human_size


class FastQCdata(BaseFile, dict):
def __init__(self, filename, human=False):
super(FastQCdata, self).__init__(filename)
if not op.exists(filename):
logging.debug("File `{0}` not found.".format(filename))
logger.debug("File `%s` not found.", filename)
# Sample_RF37-1/RF37-1_GATCAG_L008_R2_fastqc =>
# RF37-1_GATCAG_L008_R2
self["Filename"] = op.basename(op.split(filename)[0]).rsplit("_", 1)[0]
Expand Down Expand Up @@ -66,14 +66,13 @@ def __init__(self, filename, human=False):
def main():

actions = (
("count", "count reads based on FASTQC results"),
("trim", "trim reads using TRIMMOMATIC"),
("correct", "correct reads using ALLPATHS-LG"),
("hetsmooth", "reduce K-mer diversity using het-smooth"),
("alignextend", "increase read length by extending based on alignments"),
("contamination", "check reads contamination against Ecoli"),
("correct", "correct reads using ALLPATHS-LG"),
("count", "count reads based on FASTQC results"),
("diginorm", "run K-mer based normalization"),
("expand", "expand sequences using short reads"),
("hetsmooth", "reduce K-mer diversity using het-smooth"),
("trim", "trim reads using TRIMMOMATIC"),
)
p = ActionDispatcher(actions)
p.dispatch(globals())
Expand Down Expand Up @@ -205,7 +204,7 @@ def expand(args):
)

samfile, mapped, _ = get_samfile(reads, bes, bowtie=True, mapped=True)
logging.debug("Extract first {0} reads from `{1}`.".format(nreads, mapped))
logger.debug("Extract first %d reads from `%s`.", nreads, mapped)

pf = mapped.split(".")[0]
pf = pf.split("-")[0]
Expand Down Expand Up @@ -252,9 +251,7 @@ def expand(args):
fw.close()

cleanup(samfile, logfile, mapped, reads, fastafile, qualfile, blastfile, pf)
logging.debug(
"Annotated seqs (n={0}) written to `{1}`.".format(len(recs), annotatedfasta)
)
logger.debug("Annotated seqs (n=%d) written to `%s`.", len(recs), annotatedfasta)

return annotatedfasta

Expand Down Expand Up @@ -302,68 +299,6 @@ def contamination(args):
fw.close()


def alignextend(args):
"""
%prog alignextend ref.fasta read.1.fastq read.2.fastq
Wrapper around AMOS alignextend.
"""
choices = "prepare,align,filter,rmdup,genreads".split(",")
p = OptionParser(alignextend.__doc__)
p.add_option(
"--nosuffix",
default=False,
action="store_true",
help="Do not add /1/2 suffix to the read",
)
p.add_option(
"--rc",
default=False,
action="store_true",
help="Reverse complement the reads before alignment",
)
p.add_option("--len", default=100, type="int", help="Extend to this length")
p.add_option(
"--stage", default="prepare", choices=choices, help="Start from certain stage"
)
p.add_option(
"--dup",
default=10,
type="int",
help="Filter duplicates with coordinates within this distance",
)
p.add_option(
"--maxdiff", default=1, type="int", help="Maximum number of differences"
)
p.set_home("amos")
p.set_cpus()
opts, args = p.parse_args(args)

if len(args) != 3:
sys.exit(not p.print_help())

ref, r1, r2 = args
pf = op.basename(r1).split(".")[0]
cmd = op.join(opts.amos_home, "src/Experimental/alignextend.pl")
if not opts.nosuffix:
cmd += " -suffix"
bwa_idx = "{0}.ref.fa.sa".format(pf)
if not need_update(ref, bwa_idx):
cmd += " -noindex"
cmd += " -threads {0}".format(opts.cpus)
offset = guessoffset([r1])
if offset == 64:
cmd += " -I"
if opts.rc:
cmd += " -rc"
cmd += " -allow -len {0} -dup {1}".format(opts.len, opts.dup)
cmd += " -min {0} -max {1}".format(2 * opts.len, 20 * opts.len)
cmd += " -maxdiff {0}".format(opts.maxdiff)
cmd += " -stage {0}".format(opts.stage)
cmd += " ".join(("", pf, ref, r1, r2))
sh(cmd)


def count(args):
"""
%prog count *.gz
Expand Down
12 changes: 9 additions & 3 deletions jcvi/formats/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import sys

from collections import OrderedDict
from itertools import groupby, islice, cycle
from itertools import cycle, groupby, islice

from Bio import SeqIO
from ..apps.base import (
Expand Down Expand Up @@ -337,7 +337,13 @@ def timestamp():
return "{0}{1:02d}{2:02d}".format(dt.now().year, dt.now().month, dt.now().day)


def must_open(filename, mode="r", checkexists=False, skipcheck=False, oappend=False):
def must_open(
filename: str,
mode: str = "r",
checkexists: bool = False,
skipcheck: bool = False,
oappend: bool = False,
):
"""
Accepts filename and returns filehandle.
Expand Down Expand Up @@ -385,7 +391,7 @@ def must_open(filename, mode="r", checkexists=False, skipcheck=False, oappend=Fa

elif filename.endswith(".bz2"):
if "r" in mode:
cmd = "bzcat {0}".format(filename)
cmd = f"bzcat {filename}"
fp = popen(cmd, debug=False)
elif "w" in mode:
import bz2
Expand Down
Loading

0 comments on commit cc08a2c

Please sign in to comment.