Skip to content

Commit 1830584

Browse files
committed
Add SNV variant scoring by MIVMIR, GICAM models
Signed-off-by: Tor Björgen <[email protected]>
1 parent f7b82a4 commit 1830584

File tree

10 files changed

+284
-18
lines changed

10 files changed

+284
-18
lines changed

assets/rank_model_mivmir.ini

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
[Version]
2+
version = 1.39
3+
name = rank_model_mivmir
4+
5+
[Categories]
6+
7+
[[inheritance_models]]
8+
category_aggregation = min
9+
10+
[[variant_call_quality_filter]]
11+
category_aggregation = sum
12+
13+
[model_score]
14+
category = variant_call_quality_filter
15+
data_type = integer
16+
description = Inheritance model score
17+
field = INFO
18+
info_key = ModelScore
19+
record_rule = min
20+
separators = ',',':',
21+
22+
[[not_reported]]
23+
score = 0
24+
25+
[[low_qual]]
26+
score = -5
27+
lower = 0
28+
upper = 10
29+
30+
[[medium_qual]]
31+
score = -2
32+
lower = 10
33+
upper = 20
34+
35+
[[high_qual]]
36+
score = 0
37+
lower = 20
38+
upper = 300
39+
40+
[genetic_models]
41+
category = inheritance_models
42+
data_type = string
43+
description = Inheritance models followed for the variant
44+
field = INFO
45+
info_key = GeneticModels
46+
record_rule = max
47+
separators = ',', ':', '|',
48+
49+
[[ad]]
50+
priority = 1
51+
score = 1
52+
string = 'AD'
53+
54+
[[ad_dn]]
55+
score = 1
56+
priority = 1
57+
string = 'AD_dn'
58+
59+
[[ar]]
60+
score = 1
61+
priority = 1
62+
string = 'AR_hom'
63+
64+
[[ar_dn]]
65+
score = 1
66+
priority = 1
67+
string = 'AR_hom_dn'
68+
69+
[[ar_comp]]
70+
score = 1
71+
priority = 1
72+
string = 'AR_comp'
73+
74+
[[ar_comp_dn]]
75+
score = 1
76+
priority = 1
77+
string = 'AR_comp_dn'
78+
79+
[[xr]]
80+
score = 1
81+
priority = 1
82+
string = 'XR'
83+
84+
[[xr_dn]]
85+
score = 1
86+
priority = 1
87+
string = 'XR_dn'
88+
89+
[[xd]]
90+
score = 1
91+
priority = 1
92+
string = 'XD'
93+
94+
[[xd_dn]]
95+
score = 1
96+
priority = 1
97+
string = 'XD_dn'
98+
99+
[[not_reported]]
100+
score = -12
101+
102+
[filter]
103+
category = variant_call_quality_filter
104+
data_type = string
105+
description = The filters for the variant
106+
field = FILTER
107+
record_rule = min
108+
separators = ';',
109+
110+
[[not_reported]]
111+
score = 0
112+
113+
[[pass]]
114+
score = 3
115+
priority = 1
116+
string = 'PASS'
117+
118+
[[dot]]
119+
score = 3
120+
priority = 2
121+
string = '.'

conf/modules/rank_variants.config

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,24 @@ process {
107107
]
108108
}
109109

110+
withName: '.*RANK_VARIANTS_SNV:TABIX_BGZIPTABIX_GICAM' {
111+
ext.prefix = { "${meta.id}_snv_ranked_gicam_${meta.set}" }
112+
}
113+
114+
withName: '.*RANK_VARIANTS_SNV:TABIX_BGZIP_GENMOD_GICAM' {
115+
ext.prefix = { "${meta.id}_snv_ranked_${meta.set}" }
116+
publishDir = [
117+
path: { "${params.outdir}/rank_and_filter" },
118+
mode: params.publish_dir_mode,
119+
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
120+
]
121+
}
122+
123+
withName: '.*RANK_VARIANTS_SNV:BCFTOOLS_MERGE_GENMOD_GICAM' {
124+
ext.args = { "--columns VrsModelPrediction,VrsModelExplanation,GICAM" }
125+
ext.prefix = { "${meta.id}_snv_ranked_${meta.set}" }
126+
}
127+
110128
withName: '.*RANK_VARIANTS_SNV:TABIX_TABIX' {
111129
publishDir = [
112130
path: { "${params.outdir}/rank_and_filter" },

conf/test.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ params {
5252
reduced_penetrance = params.pipelines_testdata_base_path + '/reference/reduced_penetrance.tsv'
5353
score_config_mt = params.pipelines_testdata_base_path + '/reference/rank_model_snv.ini'
5454
score_config_snv = params.pipelines_testdata_base_path + '/reference/rank_model_snv.ini'
55+
score_config_genmod_gicam_snv = 'assets/rank_model_mivmir.ini'
5556
score_config_sv = params.pipelines_testdata_base_path + '/reference/rank_model_sv.ini'
5657
svdb_query_dbs = params.pipelines_testdata_base_path + '/reference/svdb_querydb_files.csv'
5758
target_bed = params.pipelines_testdata_base_path + '/reference/target.bed'

conf/test_full.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ params {
5151
reduced_penetrance = params.pipelines_testdata_base_path + 'reference/reduced_penetrance.tsv'
5252
score_config_mt = params.pipelines_testdata_base_path + 'reference/rank_model_snv.ini'
5353
score_config_snv = params.pipelines_testdata_base_path + 'reference/rank_model_snv.ini'
54+
score_config_genmod_gicam_snv = 'assets/rank_model_mivmir.ini'
5455
score_config_sv = params.pipelines_testdata_base_path + 'reference/rank_model_sv.ini'
5556
svdb_query_dbs = params.pipelines_testdata_base_path + 'reference/svdb_querydb_files.csv'
5657
target_bed = params.pipelines_testdata_base_path + 'reference/target.bed'

modules/local/gicam/main.nf

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
process GICAM_INFER {
2+
// https://github.com/Clinical-Genomics/rdds/tree/master/src/rdds/gicam
3+
4+
tag "${meta.id}"
5+
label 'process_single'
6+
7+
container "docker.io/clinicalgenomics/rdds_mivmir:v1.12.0-rc3"
8+
9+
beforeScript "mkdir ${task.workDir}/rdds-tmp"
10+
afterScript "rm -r ${task.workDir}/rdds-tmp"
11+
containerOptions {[
12+
workflow.containerEngine.equals("singularity") ? "--bind ${task.workDir}/rdds-tmp:/rdds/tmp" : "",
13+
workflow.containerEngine.equals("docker") ? "--tmpfs /rdds/tmp": "",
14+
""
15+
].minus("").join(" ")}
16+
17+
input:
18+
tuple val(meta), path(input_vcf)
19+
20+
output:
21+
tuple val(meta), path('*-predictions.vcf'), emit: vcf
22+
path "versions.yml", emit: versions
23+
24+
when:
25+
task.ext.when == null || task.ext.when
26+
27+
script:
28+
def VERSION = 'v1.12.0-rc3'
29+
"""
30+
. /opt/pyenv/bin/activate
31+
export PYTHONPATH=/rdds/src
32+
python3 -m rdds.gicam infer-vcf --cpu_cores ${task.cpus} ${input_vcf}
33+
34+
cat <<-END_VERSIONS > versions.yml
35+
"${task.process}":
36+
gicam: ${VERSION}
37+
python: \$(python --version | sed 's/Python //g')
38+
END_VERSIONS
39+
"""
40+
}

modules/local/mivmir/main.nf

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
process MIVMIR_INFER {
2+
// https://github.com/Clinical-Genomics/rdds/tree/master/src/rdds/variant_rank_score
3+
4+
tag "${meta.id}"
5+
label 'process_single'
6+
7+
container "docker.io/clinicalgenomics/rdds_mivmir:v1.12.0-rc3"
8+
9+
beforeScript "mkdir ${task.workDir}/rdds-tmp"
10+
afterScript "rm -r ${task.workDir}/rdds-tmp"
11+
containerOptions {[
12+
workflow.containerEngine.equals("singularity") ? "--bind ${task.workDir}/rdds-tmp:/rdds/tmp" : "",
13+
workflow.containerEngine.equals("docker") ? "--tmpfs /rdds/tmp": "",
14+
""
15+
].minus("").join(" ")}
16+
17+
input:
18+
tuple val(meta), path(input_vcf)
19+
20+
output:
21+
tuple val(meta), path('*-predictions.vcf'), emit: vcf
22+
path "versions.yml", emit: versions
23+
24+
when:
25+
task.ext.when == null || task.ext.when
26+
27+
script:
28+
def VERSION = 'v1.12.0-rc3'
29+
"""
30+
. /opt/pyenv/bin/activate
31+
export PYTHONPATH=/rdds/src
32+
python3 -m rdds.variant_rank_score predict-on-vcf --cpu_cores ${task.cpus} ${input_vcf}
33+
34+
cat <<-END_VERSIONS > versions.yml
35+
"${task.process}":
36+
mivmir: ${VERSION}
37+
python: \$(python --version | sed 's/Python //g')
38+
END_VERSIONS
39+
"""
40+
}

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ params {
6868
sequence_dictionary = null
6969
score_config_mt = null
7070
score_config_snv = null
71+
score_config_genmod_gicam_snv = 'assets/rank_model_mivmir.ini'
7172
score_config_sv = null
7273
sdf = null
7374
svdb_query_bedpedbs = null

nextflow_schema.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,13 @@
350350
"fa_icon": "fas fa-file",
351351
"description": "SV rank model config file for genmod."
352352
},
353+
"score_config_genmod_gicam_snv": {
354+
"type": "string",
355+
"exists": true,
356+
"format": "path",
357+
"fa_icon": "fas fa-file",
358+
"description": "Rank model config file for genmod in conjunction with MIVMIR-GICAM scoring."
359+
},
353360
"sdf": {
354361
"type": "string",
355362
"exists": true,

subworkflows/local/rank_variants.nf

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,29 @@
22
// A subworkflow to score and rank variants.
33
//
44

5-
include { GENMOD_ANNOTATE } from '../../modules/nf-core/genmod/annotate/main'
6-
include { GENMOD_MODELS } from '../../modules/nf-core/genmod/models/main'
7-
include { GENMOD_SCORE } from '../../modules/nf-core/genmod/score/main'
8-
include { GENMOD_COMPOUND } from '../../modules/nf-core/genmod/compound/main'
9-
include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main'
10-
include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main'
11-
include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main'
5+
include { GENMOD_ANNOTATE } from '../../modules/nf-core/genmod/annotate/main'
6+
include { GENMOD_MODELS } from '../../modules/nf-core/genmod/models/main'
7+
include { GENMOD_SCORE } from '../../modules/nf-core/genmod/score/main'
8+
include { GENMOD_SCORE as GENMOD_SCORE_FOR_GICAM } from '../../modules/nf-core/genmod/score/main'
9+
include { GENMOD_COMPOUND } from '../../modules/nf-core/genmod/compound/main'
10+
include { MIVMIR_INFER } from '../../modules/local/mivmir/main'
11+
include { GICAM_INFER } from '../../modules/local/gicam/main'
12+
include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main'
13+
include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main'
14+
include { TABIX_BGZIP as TABIX_BGZIP_GENMOD_GICAM } from '../../modules/nf-core/tabix/bgzip/main'
15+
include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_GICAM } from '../../modules/nf-core/tabix/bgziptabix/main'
16+
include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main'
17+
include { BCFTOOLS_ANNOTATE as BCFTOOLS_MERGE_GENMOD_GICAM } from '../../modules/nf-core/bcftools/annotate/main'
1218

1319
workflow RANK_VARIANTS {
1420

1521
take:
16-
ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
17-
ch_pedfile // channel: [mandatory] [ path(ped) ]
18-
ch_reduced_penetrance // channel: [mandatory] [ path(pentrance) ]
19-
ch_score_config // channel: [mandatory] [ path(ini) ]
22+
ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
23+
ch_pedfile // channel: [mandatory] [ path(ped) ]
24+
ch_reduced_penetrance // channel: [mandatory] [ path(pentrance) ]
25+
ch_score_config // channel: [mandatory] [ path(ini) ]
26+
ch_genmod_gicam_score_config // channel: [mandatory] [ path(ini) ]
27+
rank_with_mivmir_gicam // value
2028

2129
main:
2230
ch_versions = Channel.empty()
@@ -31,13 +39,34 @@ workflow RANK_VARIANTS {
3139

3240
GENMOD_SCORE(ch_score_in, ch_score_config)
3341

34-
GENMOD_COMPOUND(GENMOD_SCORE.out.vcf)
35-
36-
BCFTOOLS_SORT(GENMOD_COMPOUND.out.vcf) // SV file needs to be sorted before indexing
42+
// Run MIVMIR - GICAM scoring (not supported for MT SNVs and SVs)
43+
if (rank_with_mivmir_gicam) {
44+
GENMOD_SCORE_FOR_GICAM(ch_score_in, ch_genmod_gicam_score_config)
45+
MIVMIR_INFER(GENMOD_SCORE_FOR_GICAM.out.vcf)
46+
GICAM_INFER(MIVMIR_INFER.out.vcf)
47+
TABIX_BGZIPTABIX_GICAM(GICAM_INFER.out.vcf)
48+
}
3749

50+
GENMOD_COMPOUND(GENMOD_SCORE.out.vcf)
3851
TABIX_BGZIP(GENMOD_COMPOUND.out.vcf) //run only for SNVs
3952

40-
ch_vcf = TABIX_BGZIP.out.output.mix(BCFTOOLS_SORT.out.vcf)
53+
// Merge Genmod and MIVMIR-GICAM scores
54+
if (rank_with_mivmir_gicam) {
55+
TABIX_BGZIP.out.output
56+
.join(TABIX_BGZIPTABIX_GICAM.out.gz_tbi, failOnMismatch: true)
57+
.map {meta, vcf_genmod, vcf_gicam, vcf_index_gicam -> return [ meta, vcf_genmod, [], vcf_gicam, vcf_index_gicam ]}
58+
.set {ch_merge_genmod_gicam}
59+
BCFTOOLS_MERGE_GENMOD_GICAM(ch_merge_genmod_gicam, [])
60+
TABIX_BGZIP_GENMOD_GICAM(BCFTOOLS_MERGE_GENMOD_GICAM.out.vcf)
61+
}
62+
63+
BCFTOOLS_SORT(GENMOD_COMPOUND.out.vcf) // SV file needs to be sorted before indexing
64+
// Mix SNVs and SVs
65+
if (rank_with_mivmir_gicam) {
66+
ch_vcf = TABIX_BGZIP_GENMOD_GICAM.out.output.mix(BCFTOOLS_SORT.out.vcf)
67+
} else {
68+
ch_vcf = TABIX_BGZIP.out.output.mix(BCFTOOLS_SORT.out.vcf)
69+
}
4170

4271
TABIX_TABIX (ch_vcf)
4372

workflows/raredisease.nf

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ workflow RAREDISEASE {
203203
: Channel.value([])
204204
ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect()
205205
: Channel.value([])
206+
ch_genmod_gicam_score_config = params.score_config_genmod_gicam_snv ? Channel.fromPath(params.score_config_genmod_gicam_snv).collect()
207+
: Channel.value([])
206208
ch_score_config_sv = params.score_config_sv ? Channel.fromPath(params.score_config_sv).collect()
207209
: Channel.value([])
208210
ch_sdf = params.sdf ? Channel.fromPath(params.sdf).map{it -> [[id:it.simpleName],it]}.collect()
@@ -547,7 +549,9 @@ workflow RAREDISEASE {
547549
ch_ranksnv_nuclear_in,
548550
ch_pedfile,
549551
ch_reduced_penetrance,
550-
ch_score_config_snv
552+
ch_score_config_snv,
553+
ch_genmod_gicam_score_config,
554+
true
551555
)
552556
ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions)
553557
}
@@ -615,7 +619,9 @@ workflow RAREDISEASE {
615619
ch_ranksnv_mt_in,
616620
ch_pedfile,
617621
ch_reduced_penetrance,
618-
ch_score_config_mt
622+
ch_score_config_mt,
623+
ch_genmod_gicam_score_config,
624+
false
619625
)
620626
ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions)
621627
}
@@ -706,7 +712,9 @@ workflow RAREDISEASE {
706712
ch_ranksnv_sv_in,
707713
ch_pedfile,
708714
ch_reduced_penetrance,
709-
ch_score_config_sv
715+
ch_score_config_sv,
716+
ch_genmod_gicam_score_config,
717+
false
710718
)
711719
ch_versions = ch_versions.mix(RANK_VARIANTS_SV.out.versions)
712720
}

0 commit comments

Comments
 (0)