Skip to content

Commit 72eb90e

Browse files
authored
Merge pull request CCBR#73 from CCBR/qc_fix
Qc fix
2 parents fd3e663 + d590318 commit 72eb90e

12 files changed

Lines changed: 363 additions & 111 deletions

File tree

conf/base.config

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,14 @@ process {
6969
time = { check_max( 72.h * task.attempt, 'time' ) }
7070
}
7171
withName:bwamem2 {
72-
cpus = { check_max( 22 * task.attempt, 'cpus' ) }
73-
memory = { check_max( 220.GB * task.attempt, 'memory' ) }
74-
time = { check_max( 72.h * task.attempt, 'time' ) }
72+
cpus = { check_max( 20 * task.attempt, 'cpus' ) }
73+
memory = { check_max( 160.GB * task.attempt, 'memory' ) }
74+
time = { check_max( 120.h * task.attempt, 'time' ) }
75+
}
76+
withName:BWAMEM2_SPLIT {
77+
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
78+
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
79+
time = { check_max( 120.h * task.attempt, 'time' ) }
7580
}
7681
withName:'gridss_somatic|gridss_tonly' {
7782
cpus = { check_max( 8 * task.attempt, 'cpus' ) }

conf/biowulf.config

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,19 @@ process {
3737

3838
//Additional Process for BWAMEM2
3939
withName:bwamem2 {
40-
cpus = { check_max( 22 * task.attempt, 'cpus' ) }
41-
memory = { check_max( 220.GB * task.attempt, 'memory' ) }
42-
time = { check_max( 72.h * task.attempt, 'time' ) }
40+
cpus = { check_max( 20 * task.attempt, 'cpus' ) }
41+
memory = { check_max( 160.GB * task.attempt, 'memory' ) }
42+
time = { check_max( 120.h * task.attempt, 'time' ) }
4343
clusterOptions = ' --gres=lscratch:300 '
4444
}
4545

46+
withName:fastp {
47+
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
48+
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
49+
time = { check_max( 8.h * task.attempt, 'time' ) }
50+
clusterOptions = ' --gres=lscratch:400 '
51+
}
52+
4653
withName:"gridss_somatic|gridss_tonly" {
4754
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
4855
memory = { check_max( 96.GB * task.attempt, 'memory' ) }

modules/local/bwamem/bwamem2.nf

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
GENOMEREF = file(params.genomes[params.genome].genome)
2+
3+
process bwamem2 {
4+
container = "${params.containers.logan}"
5+
tag { name }
6+
errorStrategy { task.exitStatus in [137,140,143] ? 'retry' : 'terminate' }
7+
maxRetries 2
8+
9+
memory {
10+
if (task.attempt == 2) return '180 GB'
11+
else if (task.attempt == 3) return '200 GB'
12+
}
13+
14+
15+
input:
16+
tuple val(samplename),
17+
path("${samplename}.R1.trimmed.fastq.gz"),
18+
path("${samplename}.R2.trimmed.fastq.gz")
19+
20+
output:
21+
tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bam.bai")
22+
23+
script:
24+
sub_cpus = "$task.cpus".toInteger()/2
25+
26+
"""
27+
# Check for AVX512 then AVX2 then SSE41
28+
#if lscpu | grep -q avx512; then
29+
# BWA_BINARY="bwa-mem2.avx512bw"
30+
if lscpu | grep -q avx2; then
31+
BWA_BINARY="bwa-mem2.avx2"
32+
elif lscpu | grep -q sse4_1; then
33+
BWA_BINARY="bwa-mem2.sse41"
34+
else
35+
BWA_BINARY="bwa-mem2"
36+
fi
37+
38+
mkdir -p tmp
39+
\$BWA_BINARY mem -M \
40+
-R '@RG\\tID:${samplename}\\tSM:${samplename}\\tPL:illumina\\tLB:${samplename}\\tPU:${samplename}\\tCN:hgsc\\tDS:wgs' \
41+
-t $task.cpus \
42+
${GENOMEREF} \
43+
${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz |\
44+
samblaster -M | \
45+
samtools sort -T tmp/ -@ $sub_cpus -m 10G - --write-index -o ${samplename}.bam##idx##${samplename}.bam.bai
46+
"""
47+
48+
stub:
49+
"""
50+
touch ${samplename}.bam ${samplename}.bam.bai
51+
"""
52+
}
53+
54+
55+
process BWAMEM2_SPLIT {
56+
container = "${params.containers.logan}"
57+
tag { name }
58+
errorStrategy { task.exitStatus in [137,140,143] ? 'retry' : 'terminate' }
59+
maxRetries 2
60+
61+
memory {
62+
if (task.attempt == 2) return '48 GB'
63+
else if (task.attempt == 3) return '64 GB'
64+
}
65+
66+
67+
input:
68+
tuple val(samplename),
69+
path(reads), val(chunk)
70+
71+
output:
72+
tuple val(samplename),
73+
path("${samplename}_${chunk}.bam")
74+
75+
script:
76+
77+
"""
78+
# Check for AVX512 then AVX2 then SSE41
79+
#if lscpu | grep -q avx512; then
80+
# BWA_BINARY="bwa-mem2.avx512bw"
81+
if lscpu | grep -q avx2; then
82+
BWA_BINARY="bwa-mem2.avx2"
83+
elif lscpu | grep -q sse4_1; then
84+
BWA_BINARY="bwa-mem2.sse41"
85+
else
86+
BWA_BINARY="bwa-mem2"
87+
fi
88+
89+
\$BWA_BINARY mem -M \
90+
-R '@RG\\tID:${samplename}\\tSM:${samplename}\\tPL:illumina\\tLB:${samplename}\\tPU:${samplename}\\tCN:hgsc\\tDS:wgs' \
91+
-t $task.cpus \
92+
${GENOMEREF} \
93+
${reads[0]} ${reads[1]} | \
94+
samtools view - -b -o ${samplename}_${chunk}.bam
95+
96+
"""
97+
98+
stub:
99+
"""
100+
touch ${samplename}_${chunk}.bam
101+
"""
102+
}
103+
104+
105+
106+
107+
process COMBINE_ALIGNMENTS {
108+
container = "${params.containers.logan}"
109+
label 'process_medium'
110+
111+
input:
112+
tuple val(samplename), path(bam)
113+
114+
output:
115+
tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bam.bai"), emit: bams
116+
path("${samplename}.metrics")
117+
118+
script:
119+
allbams = bam.join(" ")
120+
sub_cpus = "$task.cpus".toInteger()/2
121+
122+
"""
123+
mkdir tmp
124+
samtools cat -@ $task.cpus $allbams -o ${samplename}_tmp.sam
125+
samblaster -M ${samplename}.metrics ${samplename}_tmp.sam |\
126+
samtools sort -T tmp/ -@ $sub_cpus -m 10G - --write-index -o ${samplename}.bam##idx##${samplename}.bam.bai
127+
128+
"""
129+
130+
stub:
131+
"""
132+
touch "${samplename}.bam" "${samplename}.bam.bai"
133+
"""
134+
135+
}

modules/local/fastp/fastp.nf

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
process fastp {
2+
container = "${params.containers.logan}"
3+
label 'process_medium'
4+
tag { name }
5+
6+
input:
7+
tuple val(samplename), path(fqs)
8+
9+
output:
10+
tuple val(samplename),
11+
path("${samplename}.R1.trimmed.fastq.gz"),
12+
path("${samplename}.R2.trimmed.fastq.gz"),
13+
path("${samplename}.fastp.json"),
14+
path("${samplename}.fastp.html")
15+
16+
script:
17+
"""
18+
fastp -w $task.cpus \
19+
--detect_adapter_for_pe \
20+
--in1 ${fqs[0]} \
21+
--in2 ${fqs[1]} \
22+
--out1 ${samplename}.R1.trimmed.fastq.gz \
23+
--out2 ${samplename}.R2.trimmed.fastq.gz \
24+
--json ${samplename}.fastp.json \
25+
--html ${samplename}.fastp.html
26+
"""
27+
28+
stub:
29+
"""
30+
touch ${samplename}.R1.trimmed.fastq.gz
31+
touch ${samplename}.R2.trimmed.fastq.gz
32+
touch ${samplename}.fastp.json
33+
touch ${samplename}.fastp.html
34+
35+
"""
36+
}
37+
38+
process fastp_split {
39+
container = "${params.containers.logan}"
40+
label 'process_medium'
41+
tag { name }
42+
43+
input:
44+
tuple val(samplename), path(fqs)
45+
46+
output:
47+
tuple val(samplename), path("*_R{1,2}.trimmed.fastq.gz"),
48+
path("${samplename}.fastp.json"),
49+
path("${samplename}.fastp.html")
50+
51+
52+
53+
script:
54+
"""
55+
fastp -w $task.cpus \
56+
--detect_adapter_for_pe \
57+
-S $params.split_fastq \
58+
--in1 ${fqs[0]} \
59+
--in2 ${fqs[1]} \
60+
--out1 ${samplename}_R1.trimmed.fastq.gz \
61+
--out2 ${samplename}_R2.trimmed.fastq.gz \
62+
--json ${samplename}.fastp.json \
63+
--html ${samplename}.fastp.html
64+
"""
65+
66+
stub:
67+
"""
68+
touch 0001.${samplename}_R1.trimmed.fastq.gz
69+
touch 0001.${samplename}_R2.trimmed.fastq.gz
70+
touch ${samplename}.fastp.json
71+
touch ${samplename}.fastp.html
72+
73+
"""
74+
}

modules/local/fastq_screen.nf

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@ process fastq_screen {
88
input:
99
tuple val(samplename),
1010
path("${samplename}.R1.trimmed.fastq.gz"),
11-
path("${samplename}.R2.trimmed.fastq.gz"),
12-
path("${samplename}.fastp.json"),
13-
path("${samplename}.fastp.html")
11+
path("${samplename}.R2.trimmed.fastq.gz")
1412

1513
output:
1614
tuple path("${samplename}.R1.trimmed_screen.html"),

modules/local/fastqc.nf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,16 @@ process fastqc {
2121

2222
"""
2323
mkdir -p fastqc
24-
fastqc -t 8 \
24+
fastqc -t $task.cpus \
2525
-f bam \
2626
-o fastqc \
27-
$bam
28-
mv fastqc/${samplename}_fastqc.html ${samplename}_fastqc.html
29-
mv fastqc/${samplename}_fastqc.zip ${samplename}_fastqc.zip
27+
${bam}
28+
mv fastqc/${bam.BaseName}_fastqc.html ${samplename}_fastqc.html
29+
mv fastqc/${bam.BaseName}_fastqc.zip ${samplename}_fastqc.zip
3030
"""
3131

3232
stub:
3333
"""
34-
touch ${samplename}_fastqc.html ${samplename}_fastqc.zip
34+
touch ${samplename}_fastqc.html ${samplename}_fastqc.zip
3535
"""
3636
}

modules/local/lancet2/lancet2.nf

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
GENOMEREF=file(params.genomes[params.genome].genome)
2+
3+
process lancet2_tn {
4+
container "${params.containers.lancet}"
5+
label 'process_somaticcaller'
6+
errorStrategy 'ignore'
7+
8+
input:
9+
tuple val(tumorname), path(tumor), path(tumorbai),
10+
val(normalname), path(normal), path(normalbai), path(bed)
11+
12+
13+
output:
14+
tuple val(tumorname), val(normalname),
15+
path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"),
16+
path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"),
17+
path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"),
18+
path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"),
19+
path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"),
20+
path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi")
21+
22+
script:
23+
24+
"""
25+
Lancet2 pipeline \
26+
--normal ${normal} \
27+
--tumor ${tumor} \
28+
--reference $GENOMEREF \
29+
--num-threads $task.cpus -R ${bed} \
30+
--out-vcfgz ${tumorname}_vs_${normalname}_${bed.simpleName}_temp.vcf.gz
31+
32+
python3 score_variants.py \
33+
${tumorname}_vs_${normalname}_${bed.simpleName}_temp.vcf.gz somatic_ebm.lancet_6ef7ba445a.v1.pkl > ${tumorname}_vs_${normalname}_${bed.simpleName}_scored.vcf
34+
35+
bcftools view ${tumorname}_vs_${normalname}_${bed.simpleName}_scored.vcf -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lancet.vcf.gz
36+
bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}_lancet.vcf.gz
37+
38+
"""
39+
40+
stub:
41+
42+
"""
43+
touch "${tumorname}_vs_${normalname}_${bed.simpleName}.vcf.gz"
44+
touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"
45+
touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"
46+
touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"
47+
touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi"
48+
49+
"""
50+
}
51+
52+

modules/local/qc.nf

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,7 @@ process fastq_screen {
5151

5252
input:
5353
tuple val(samplename),
54-
path("${samplename}.R1.trimmed.fastq.gz"),
55-
path("${samplename}.R2.trimmed.fastq.gz"),
56-
path("${samplename}.fastp.json"),
57-
path("${samplename}.fastp.html")
54+
path(fqs)
5855

5956
output:
6057
tuple path("${samplename}.R1.trimmed_screen.html"),
@@ -74,7 +71,7 @@ process fastq_screen {
7471
--subset 1000000 \
7572
--aligner bowtie2 \
7673
--force \
77-
${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz
74+
${fqs[0]} ${fqs[1]}
7875
7976
"""
8077

0 commit comments

Comments
 (0)