JAFFA/JAFFAL.groovy at master · Oshlack/JAFFA · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/***********************************************************
 ** This is the JAFFA pipeline file for fusion detection
 ** with noisy long read data. For polished long read data,
 ** use JAFFA_direct.groovy. Run like so:
 **    bpipe run <path_to_this_file> <path_to_fastq/fasta_files>
 ** See our website for details	on running options:
 ** https://github.com/Oshlack/JAFFA/wiki.
 **
 ** Author: Nadia Davidson <davidson.n@wehi.edu.au>
 ** Last Update: 2025
 *********************************************************/

codeBase = file(bpipe.Config.config.script).parentFile.absolutePath
load codeBase+"/JAFFA_stages.groovy"


//different defaults for long-reads
reassign_dist=50
split_buffer=1 //reads are split at breakpoint+buffer before realignment to genome


get_fasta = {
   doc "Converting fastqs to fasta"
   output.dir=jaffa_output+"/"+branch
   produce(branch.toString()+".fasta"){
      exec "$reformat ignorebadquality=t in=$input out=$output threads=$threads ;"
   }
}

minimap2_transcriptome = {
   doc "Aligning candidates to transcriptome using minimap2"
   output.dir=jaffa_output+"/"+branch
   produce(branch.toString() +".paf", branch.toString() +".counts"){
        exec """
           $minimap2 -t $threads -x map-ont -c $transFasta $input > $output1 ;
	   cat $output1 | cut -f1,6 | $make_count_table $transTable $anno_prefix > $output2 ;
        """
   }
}

extract_left_right_sequence = {
   doc "Spliting fusion reads ready for realignment"
   output.dir=jaffa_output+"/"+branch
   produce(branch.toString() +".fusion.left.fa", branch.toString() +".fusion.right.fa"){
      exec """
          $split_fusion_reads $input.txt $input.fa $split_buffer $output1 $output2 ;
      """
      }
}

minimap2_genome = {
   doc "Aligning candidates to genome using minimap2"
   output.dir=jaffa_output+"/"+branch
   produce(branch.toString()+"_genome.paf",branch.toString()+"_genome.psl"){
	exec """
	   $minimap2 -t $threads -x splice --junc-bed $transBed -c --score-N 1 $genomeFasta $input1 > $output1;
	   $minimap2 -t $threads -x splice --junc-bed $transBed -c --score-N 1 $genomeFasta $input2 >> $output1;

	   grep \$'\\t+\\t' $output1 | awk -F'\\t' -v OFS="\\t" '{
	   	print \$4-\$3,0,0,0,0,0,0,0,\$5,\$1,\$2,\$3,\$4,\$6,\$7,\$8,\$9,2, 100","\$4-\$3-100",",
		\$3","\$3+100",", \$8","\$9-\$4+\$3+100","
	   }' > $output2 ;
	   grep \$'\\t-\\t' $output1 | awk -F'\\t' -v OFS="\\t" '{
	   	print \$4-\$3,0,0,0,0,0,0,0,\$5,\$1,\$2,\$3,\$4,\$6,\$7,\$8,\$9,2, 100","\$4-\$3-100",",
	   	\$2-\$4","\$2-\$4+100",", \$8","\$9-\$4+\$3+100","
	   }' >> $output2 ;
        """
   }
}

report_3_gene_fusions = {
   doc "Checking for reads that support multi-fusion transcripts"
   output.dir=jaffa_output+"/"+branch.toString()
   produce(branch.toString()+".3gene_summary",branch.toString()+".3gene_reads"){
      exec """
         $make_3_gene_fusion_table $input.summary $input.txt $output2 > $output1
      """
   }
}


readLayout="single"
fastqInputFormat="%.gz"

common_steps = segment {
   minimap2_transcriptome +
   filter_transcripts +
   extract_fusion_sequences +
   extract_left_right_sequence +
   minimap2_genome +
   make_fasta_reads_table +
   get_final_list +
   report_3_gene_fusions
}


// below is the pipeline for a fasta file
if(args[0].endsWith(fastaSuffix)) {
   run { run_check + fastaInputFormat * [
	    common_steps ] + compile_all_results
   }
} else { //or fastq.gz will be converted to fasta.
   run { run_check + fastqInputFormat * [
	    get_fasta + common_steps ] + compile_all_results
   }
}