-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathJAFFAL.groovy
More file actions
executable file
·106 lines (91 loc) · 3.47 KB
/
JAFFAL.groovy
File metadata and controls
executable file
·106 lines (91 loc) · 3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/***********************************************************
** This is the JAFFA pipeline file for fusion detection
** with noisy long read data. For polished long read data,
** use JAFFA_direct.groovy. Run like so:
** bpipe run <path_to_this_file> <path_to_fastq/fasta_files>
** See our website for details on running options:
** https://github.com/Oshlack/JAFFA/wiki.
**
** Author: Nadia Davidson <davidson.n@wehi.edu.au>
** Last Update: 2025
*********************************************************/
codeBase = file(bpipe.Config.config.script).parentFile.absolutePath
load codeBase+"/JAFFA_stages.groovy"
//different defaults for long-reads
reassign_dist=50
split_buffer=1 //reads are split at breakpoint+buffer before realignment to genome
get_fasta = {
doc "Converting fastqs to fasta"
output.dir=jaffa_output+"/"+branch
produce(branch.toString()+".fasta"){
exec "$reformat ignorebadquality=t in=$input out=$output threads=$threads ;"
}
}
minimap2_transcriptome = {
doc "Aligning candidates to transcriptome using minimap2"
output.dir=jaffa_output+"/"+branch
produce(branch.toString() +".paf", branch.toString() +".counts"){
exec """
$minimap2 -t $threads -x map-ont -c $transFasta $input > $output1 ;
cat $output1 | cut -f1,6 | $make_count_table $transTable $anno_prefix > $output2 ;
"""
}
}
extract_left_right_sequence = {
doc "Spliting fusion reads ready for realignment"
output.dir=jaffa_output+"/"+branch
produce(branch.toString() +".fusion.left.fa", branch.toString() +".fusion.right.fa"){
exec """
$split_fusion_reads $input.txt $input.fa $split_buffer $output1 $output2 ;
"""
}
}
minimap2_genome = {
doc "Aligning candidates to genome using minimap2"
output.dir=jaffa_output+"/"+branch
produce(branch.toString()+"_genome.paf",branch.toString()+"_genome.psl"){
exec """
$minimap2 -t $threads -x splice --junc-bed $transBed -c --score-N 1 $genomeFasta $input1 > $output1;
$minimap2 -t $threads -x splice --junc-bed $transBed -c --score-N 1 $genomeFasta $input2 >> $output1;
grep \$'\\t+\\t' $output1 | awk -F'\\t' -v OFS="\\t" '{
print \$4-\$3,0,0,0,0,0,0,0,\$5,\$1,\$2,\$3,\$4,\$6,\$7,\$8,\$9,2, 100","\$4-\$3-100",",
\$3","\$3+100",", \$8","\$9-\$4+\$3+100","
}' > $output2 ;
grep \$'\\t-\\t' $output1 | awk -F'\\t' -v OFS="\\t" '{
print \$4-\$3,0,0,0,0,0,0,0,\$5,\$1,\$2,\$3,\$4,\$6,\$7,\$8,\$9,2, 100","\$4-\$3-100",",
\$2-\$4","\$2-\$4+100",", \$8","\$9-\$4+\$3+100","
}' >> $output2 ;
"""
}
}
report_3_gene_fusions = {
doc "Checking for reads that support multi-fusion transcripts"
output.dir=jaffa_output+"/"+branch.toString()
produce(branch.toString()+".3gene_summary",branch.toString()+".3gene_reads"){
exec """
$make_3_gene_fusion_table $input.summary $input.txt $output2 > $output1
"""
}
}
readLayout="single"
fastqInputFormat="%.gz"
common_steps = segment {
minimap2_transcriptome +
filter_transcripts +
extract_fusion_sequences +
extract_left_right_sequence +
minimap2_genome +
make_fasta_reads_table +
get_final_list +
report_3_gene_fusions
}
// below is the pipeline for a fasta file
if(args[0].endsWith(fastaSuffix)) {
run { run_check + fastaInputFormat * [
common_steps ] + compile_all_results
}
} else { //or fastq.gz will be converted to fasta.
run { run_check + fastqInputFormat * [
get_fasta + common_steps ] + compile_all_results
}
}