generated from snakemake-workflows/snakemake-workflow-template
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
101 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# This rule uses hifiasm to assemble the genome after filtering out organelles reads | ||
# Following the instructions for assembling heterozygous genomes from the hifiasm github | ||
|
||
rule run_hifiasm: | ||
input: | ||
organelles_filtered = "results/reads/hifi/filtered_orgs_reads.fastq.gz" | ||
output: | ||
gfa = "results/assemblies/hifiasm/hifiasm.asm.bp.p_ctg.gfa", | ||
fasta = "results/assemblies/hifiasm/hifiasm.asm.bp.p_ctg.fa" | ||
conda: | ||
"../envs/hifiasm.yaml" | ||
params: | ||
optional_params = " ".join(f'{k} {v}' for k, v in config['hifiasm']['optional_params'].items() if v) | ||
shell: | ||
""" | ||
hifiasm {input.organelles_filtered} -t {config[hifiasm][t]} -o results/assemblies/hifiasm/hifiasm.asm {params.optional_params} | ||
awk '/^S/{{print ">"$$2;print $$3}}' {output.gfa} > {output.fasta} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# This rule uses minimap2 to align the fastq reads to the organelles fasta | ||
|
||
rule run_minimap2: | ||
input: | ||
mito = "results/oatk/oatk.asm.mito.ctg.fasta", | ||
pltd = "results/oatk/oatk.asm.pltd.ctg.fasta", | ||
fastq = "results/reads/hifi/hifi.fastq.gz" | ||
output: | ||
mito_sam = "results/minimap2/aln.mito.sam", | ||
pltd_sam = "results/minimap2/aln.pltd.sam", | ||
conda: | ||
"../envs/minimap2.yaml" | ||
shell: | ||
""" | ||
minimap2 -ax map-hifi {input.mito} -t {config[minimap2][t]} {input.fastq} > {output.mito_sam} | ||
minimap2 -ax map-hifi {input.pltd} -t {config[minimap2][t]} {input.fastq} > {output.pltd_sam} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# This rule uses purge_dups purge haplotigs and overlaps in the assembly produced by hifiasm | ||
|
||
rule run_purge_dups: | ||
input: | ||
reads = "results/reads/hifi/filtered_orgs_reads.fastq.gz", | ||
fasta = "results/assemblies/hifiasm/hifiasm.asm.bp.p_ctg.fa" | ||
output: | ||
paf = "results/assemblies/purge_dups/hifi_vs_hifiasm_contigs.paf.gz", | ||
pcbstat_stat = "results/assemblies/purge_dups/PB.stat", | ||
pcbstat_cov = "results/assemblies/purge_dups/PB.base.cov", | ||
split_fa = "results/assemblies/purge_dups/hifiasm.asm.split", | ||
self_paf = "results/assemblies/purge_dups/hifiasm.split.self.paf.gz", | ||
dups_bed = "results/assemblies/purge_dups/dups.bed", | ||
log = "results/assemblies/purge_dups/purge_dups.log", | ||
purged_fasta = "results/assemblies/purge_dups/purged.fa", | ||
fasta_rename = "results/assemblies/purge_dups/hifiasm.asm.purged.fa" | ||
conda: | ||
"../envs/purge_dups.yaml" | ||
shell: | ||
""" | ||
minimap2 -xasm20 {input.fasta} {input.reads} -t {config[minimap2][t]} | gzip -c - > {output.paf} | ||
pbcstat {output.paf} | ||
mv PB.* results/assemblies/purge_dups/ | ||
calcutus {output.pcbstat_stat} > cutoffs 2>calcults.log | ||
split_fa {input.fasta} > {output.split_fa} | ||
minimap2 -xasm5 -DP {output.split_fa} {output.split_fa} -t {config[minimap2][t]} | gzip -c - > {output.self_paf} | ||
purge_dups -2 -T cutoffs -c {output.pcbstat_cov} {output.self_paf} > {output.dups_bed} 2> {output.log} | ||
get_seqs -e {output.dups_bed} {input.fasta} > {output.purged_fasta} | ||
mv {output.purged_fasta} {output.fasta_rename} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# This rule uses samtools and shell commands to extract the name of the fastq reads aligning to organelles fasta for their subsequent removal | ||
|
||
rule run_samtools: | ||
input: | ||
mito = "results/minimap2/aln.mito.sam", | ||
pltd = "results/minimap2/aln.pltd.sam" | ||
output: | ||
mito_bam = "results/samtools/mito.sorted.bam", | ||
pltd_bam = "results/samtools/pltd.sorted.bam", | ||
mito_reads = "results/samtools/mapped.mito.reads.txt", | ||
pltd_reads = "results/samtools/mapped.pltd.reads.txt", | ||
organelles = "results/samtools/organelles.reads.txt" | ||
conda: | ||
"../envs/samtools.yaml" | ||
shell: | ||
""" | ||
samtools view -Sb {input.mito} | samtools sort -o {output.mito_bam} | ||
samtools view -Sb {input.pltd} | samtools sort -o {output.pltd_bam} | ||
samtools view -F 4 {output.mito_bam} | cut -f1 > {output.mito_reads} | ||
samtools view -F 4 {output.pltd_bam} | cut -f1 > {output.pltd_reads} | ||
cat {output.mito_reads} {output.pltd_reads} | sort | uniq > {output.organelles} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# This rule uses seqkit to filter organelle reads from the initial fastq files | ||
|
||
rule run_seqkit: | ||
input: | ||
organelles = "results/samtools/organelles.reads.txt", | ||
fastq = "results/reads/hifi/hifi.fastq.gz" | ||
output: | ||
organelles_filtered = "results/reads/hifi/filtered_orgs_reads.fastq.gz" | ||
conda: | ||
"../envs/seqkit.yaml" | ||
shell: | ||
""" | ||
seqkit grep -f {input.organelles} -v {input.fastq} | gzip > {output.organelles_filtered} | ||
""" |