-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparaphase.wdl
More file actions
316 lines (284 loc) · 8.16 KB
/
paraphase.wdl
File metadata and controls
316 lines (284 loc) · 8.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
version 1.0
workflow paraphase {
meta {
title: "Paraphase workflow"
summary: "Haplotype phasing, genotyping, and F8 inversion calling workflow"
description: "A workflow for phasing haplotypes using Paraphase followed by genotyping and F8 inversion calling. The workflow performs targeted phasing of specific genomic regions and generates phased BAMs, VCFs, and JSON reports."
}
parameter_meta {
sample_name: {
help: "Name of the sample being processed",
label: "Sample name"
}
sex: {
help: "Biological sex of the sample",
label: "Sample sex",
choices: ["M", "F"]
}
mapped_bam: {
help: "Input BAM file containing aligned reads",
label: "Mapped BAM"
}
mapped_bam_bai: {
help: "Index file for the input BAM",
label: "Mapped BAM index"
}
ref_fasta: {
help: "Reference genome in FASTA format",
label: "Reference FASTA"
}
ref_index: {
help: "Index file for the reference FASTA",
label: "Reference FASTA index"
}
config_file: {
help: "Paraphase configuration file",
label: "Paraphase config"
}
genome_version: {
help: "Reference genome version",
label: "Genome version"
}
annotation_vcf: {
help: "Optional VCF file for havanno annotation of paraphase VCF outputs",
label: "Paraphase annotation VCF"
}
# Outputs
paraphase_bam: {
help: "Output phased BAM from Paraphase",
label: "Phased BAM"
}
paraphase_bam_bai: {
help: "Index for the phased BAM",
label: "Phased BAM index"
}
paraphase_json: {
help: "JSON report from Paraphase",
label: "Paraphase report"
}
paraphase_vcfs: {
help: "VCF files containing phased variants",
label: "Phased VCFs"
}
f8_vcf: {
help: "VCF containing F8 inversion calls",
label: "F8 inversion VCF"
}
f8_json: {
help: "JSON report for F8 inversion calls",
label: "F8 inversion report"
}
havanno_json: {
help: "Array of JSON files containing havanno annotations (optional)",
label: "Paraphase havanno annotations"
}
}
input {
String sample_name
String sex
File mapped_bam
File mapped_bam_bai
File ref_fasta
File ref_index
File config_file
String genome_version
File? annotation_vcf
String docker_smrttools
}
call run_paraphase {
input:
sample_name = sample_name,
mapped_bam = mapped_bam,
mapped_bam_bai = mapped_bam_bai,
ref_fasta = ref_fasta,
ref_index = ref_index,
config_file = config_file,
genome_version = genome_version,
annotation_vcf = annotation_vcf,
docker_smrttools = docker_smrttools
}
call call_f8 {
input:
sex = sex,
paraphase_bam = run_paraphase.paraphase_bam,
paraphase_bam_bai = run_paraphase.paraphase_bam_bai,
out_prefix = "~{sample_name}",
genome_version = genome_version,
docker_smrttools = docker_smrttools
}
output {
File paraphase_bam = run_paraphase.paraphase_bam
File paraphase_bam_bai = run_paraphase.paraphase_bam_bai
File paraphase_json = run_paraphase.paraphase_json
Array[File] paraphase_vcfs = run_paraphase.paraphase_vcfs
File? havanno_json = run_paraphase.havanno_json
File f8_vcf = call_f8.vcf
File f8_json = call_f8.json
}
}
task run_paraphase {
meta {
title: "Paraphase"
summary: "Performs haplotype phasing and genotyping using Paraphase"
description: "Uses Paraphase to phase haplotypes in targeted genomic regions. Generates phased BAM files, VCFs, and JSON reports."
}
parameter_meta {
sample_name: {
help: "Name of the sample being processed",
label: "Sample name"
}
mapped_bam: {
help: "Input BAM file containing aligned reads",
label: "Input BAM"
}
mapped_bam_bai: {
help: "Index file for the input BAM",
label: "Input BAM index"
}
ref_fasta: {
help: "Reference genome in FASTA format",
label: "Reference FASTA"
}
ref_index: {
help: "Index file for the reference FASTA",
label: "Reference FASTA index"
}
config_file: {
help: "Paraphase configuration file",
label: "Paraphase config"
}
genome_version: {
help: "Reference genome version",
label: "Genome version"
}
annotation_vcf: {
help: "Optional VCF file for havanno annotation",
label: "Annotation VCF"
}
threads: {
help: "Number of CPU threads to use (default: 4)",
label: "CPU threads"
}
mem_gb: {
help: "Memory allocation in gigabytes (default: 8)",
label: "Memory (GB)"
}
}
input {
String sample_name
File mapped_bam
File mapped_bam_bai
File ref_fasta
File ref_index
File config_file
String genome_version
File? annotation_vcf
Int threads = 4
Int mem_gb = 8
String docker_smrttools
}
String out_dir = "~{sample_name}_paraphase"
Int disk_size = ceil((size(mapped_bam, 'GB') + size(ref_fasta, 'GB')) * 2 + 20)
command <<<
set -e
paraphase \
--bam ~{mapped_bam} \
--reference ~{ref_fasta} \
--out ~{out_dir} \
--genome ~{genome_version} \
--threads ~{threads} \
--config ~{config_file} \
--write-nocalls-in-vcf \
--targeted
~{if defined(annotation_vcf) then
"havanno --variant-vcf " + annotation_vcf + " --paraphase-dir " + out_dir + " > " + sample_name + ".havanno.json"
else ""}
>>>
output {
File paraphase_bam = "~{out_dir}/~{sample_name}.paraphase.bam"
File paraphase_bam_bai = "~{out_dir}/~{sample_name}.paraphase.bam.bai"
File paraphase_json = "~{out_dir}/~{sample_name}.paraphase.json"
Array[File] paraphase_vcfs = glob("~{out_dir}/~{sample_name}_paraphase_vcfs/*")
File? havanno_json = "~{sample_name}.havanno.json"
}
runtime {
docker: docker_smrttools
cpu: threads
memory: "~{mem_gb} GB"
disk: disk_size + " GB"
disks: "local-disk " + disk_size + " SSD"
}
}
task call_f8 {
meta {
title: "F8 inversion caller"
summary: "Calls F8 inversions from phased BAM"
description: "Analyzes phased BAM files to detect and genotype F8 gene inversions, generates both VCF and JSON results."
}
parameter_meta {
sex: {
help: "Biological sex of the sample",
label: "Sample sex",
choices: ["M", "F"]
}
paraphase_bam: {
help: "Input phased BAM file",
label: "Phased BAM"
}
paraphase_bam_bai: {
help: "Index for the phased BAM",
label: "Phased BAM index"
}
out_prefix: {
help: "Prefix for output files",
label: "Output prefix"
}
genome_version: {
help: "Reference genome version",
label: "Genome version"
}
threads: {
help: "Number of CPU threads to use (default: 1)",
label: "CPU threads"
}
mem_gb: {
help: "Memory allocation in gigabytes (default: 4)",
label: "Memory (GB)"
}
}
input {
String sex
File paraphase_bam
File paraphase_bam_bai
String genome_version
String out_prefix
String docker_smrttools
Int threads = 1
Int mem_gb = 4
}
String sample_sex = if select_first([sex, "F"]) == "M" then "M" else "F"
Int disk_size = ceil(size(paraphase_bam, 'GB') * 2 + 10)
command <<<
set -e
f8_inversion.py \
--bam ~{paraphase_bam} \
--prefix ~{out_prefix} \
--genome ~{genome_version} \
--json \
--sex ~{sample_sex} \
--out out/
mv out/~{out_prefix}.f8inversion.vcf ~{out_prefix}.f8inversion.vcf
mv out/~{out_prefix}.f8inversion.json ~{out_prefix}.f8inversion.json
>>>
output {
File vcf = "~{out_prefix}.f8inversion.vcf"
File json = "~{out_prefix}.f8inversion.json"
}
runtime {
docker: docker_smrttools
cpu: threads
memory: "~{mem_gb} GB"
disk: disk_size + " GB"
disks: "local-disk " + disk_size + " SSD"
}
}