1+ GENOMEREF = file(params. genomes[params. genome]. genome)
2+
3+ process bwamem2 {
4+ container = " ${ params.containers.logan} "
5+ tag { name }
6+ errorStrategy { task. exitStatus in [137 ,140 ,143 ] ? ' retry' : ' terminate' }
7+ maxRetries 2
8+
9+ memory {
10+ if (task. attempt == 2 ) return ' 180 GB'
11+ else if (task. attempt == 3 ) return ' 200 GB'
12+ }
13+
14+
15+ input:
16+ tuple val(samplename),
17+ path(" ${ samplename} .R1.trimmed.fastq.gz" ),
18+ path(" ${ samplename} .R2.trimmed.fastq.gz" )
19+
20+ output:
21+ tuple val(samplename), path(" ${ samplename} .bam" ), path(" ${ samplename} .bam.bai" )
22+
23+ script:
24+ sub_cpus = " $task . cpus " . toInteger()/ 2
25+
26+ """
27+ # Check for AVX512 then AVX2 then SSE41
28+ #if lscpu | grep -q avx512; then
29+ # BWA_BINARY="bwa-mem2.avx512bw"
30+ if lscpu | grep -q avx2; then
31+ BWA_BINARY="bwa-mem2.avx2"
32+ elif lscpu | grep -q sse4_1; then
33+ BWA_BINARY="bwa-mem2.sse41"
34+ else
35+ BWA_BINARY="bwa-mem2"
36+ fi
37+
38+ mkdir -p tmp
39+ \$ BWA_BINARY mem -M \
40+ -R '@RG\\ tID:${ samplename} \\ tSM:${ samplename} \\ tPL:illumina\\ tLB:${ samplename} \\ tPU:${ samplename} \\ tCN:hgsc\\ tDS:wgs' \
41+ -t $task . cpus \
42+ ${ GENOMEREF} \
43+ ${ samplename} .R1.trimmed.fastq.gz ${ samplename} .R2.trimmed.fastq.gz |\
44+ samblaster -M | \
45+ samtools sort -T tmp/ -@ $sub_cpus -m 10G - --write-index -o ${ samplename} .bam##idx##${ samplename} .bam.bai
46+ """
47+
48+ stub:
49+ """
50+ touch ${ samplename} .bam ${ samplename} .bam.bai
51+ """
52+ }
53+
54+
55+ process BWAMEM2_SPLIT {
56+ container = " ${ params.containers.logan} "
57+ tag { name }
58+ errorStrategy { task. exitStatus in [137 ,140 ,143 ] ? ' retry' : ' terminate' }
59+ maxRetries 2
60+
61+ memory {
62+ if (task. attempt == 2 ) return ' 48 GB'
63+ else if (task. attempt == 3 ) return ' 64 GB'
64+ }
65+
66+
67+ input:
68+ tuple val(samplename),
69+ path(reads), val(chunk)
70+
71+ output:
72+ tuple val(samplename),
73+ path(" ${ samplename} _${ chunk} .bam" )
74+
75+ script:
76+
77+ """
78+ # Check for AVX512 then AVX2 then SSE41
79+ #if lscpu | grep -q avx512; then
80+ # BWA_BINARY="bwa-mem2.avx512bw"
81+ if lscpu | grep -q avx2; then
82+ BWA_BINARY="bwa-mem2.avx2"
83+ elif lscpu | grep -q sse4_1; then
84+ BWA_BINARY="bwa-mem2.sse41"
85+ else
86+ BWA_BINARY="bwa-mem2"
87+ fi
88+
89+ \$ BWA_BINARY mem -M \
90+ -R '@RG\\ tID:${ samplename} \\ tSM:${ samplename} \\ tPL:illumina\\ tLB:${ samplename} \\ tPU:${ samplename} \\ tCN:hgsc\\ tDS:wgs' \
91+ -t $task . cpus \
92+ ${ GENOMEREF} \
93+ ${ reads[0]} ${ reads[1]} | \
94+ samtools view - -b -o ${ samplename} _${ chunk} .bam
95+
96+ """
97+
98+ stub:
99+ """
100+ touch ${ samplename} _${ chunk} .bam
101+ """
102+ }
103+
104+
105+
106+
107+ process COMBINE_ALIGNMENTS {
108+ container = " ${ params.containers.logan} "
109+ label ' process_medium'
110+
111+ input:
112+ tuple val(samplename), path(bam)
113+
114+ output:
115+ tuple val(samplename), path(" ${ samplename} .bam" ), path(" ${ samplename} .bam.bai" ), emit: bams
116+ path(" ${ samplename} .metrics" )
117+
118+ script:
119+ allbams = bam. join(" " )
120+ sub_cpus = " $task . cpus " . toInteger()/ 2
121+
122+ """
123+ mkdir tmp
124+ samtools cat -@ $task . cpus $allbams -o ${ samplename} _tmp.sam
125+ samblaster -M ${ samplename} .metrics ${ samplename} _tmp.sam |\
126+ samtools sort -T tmp/ -@ $sub_cpus -m 10G - --write-index -o ${ samplename} .bam##idx##${ samplename} .bam.bai
127+
128+ """
129+
130+ stub:
131+ """
132+ touch "${ samplename} .bam" "${ samplename} .bam.bai"
133+ """
134+
135+ }
0 commit comments