-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsars_cov2_reads.smk
More file actions
112 lines (104 loc) · 4.03 KB
/
sars_cov2_reads.smk
File metadata and controls
112 lines (104 loc) · 4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
rule all:
input:
"bin/pbsim",
"bin/QSHMM-ONT-HQ.model",
"bin/ERRHMM-SEQUEL.model",
expand("output/sars-cov-2/reads/{haplo}/reads.fasta", haplo=glob_wildcards("data/sars-cov-2/haplotypes/sequence_{haplo}.fasta").haplo),
expand("output/sars-cov-2/rec_reads/{rec_haplo}/reads.fasta", rec_haplo=glob_wildcards("data/sars-cov-2/rec_haplos/sequence_r{rec_haplo}.fasta").rec_haplo),
shell:
"""
bash split_sars_fasta.sh
echo "Reads simulated"
"""
rule init_pbsim:
output:
haplos = "output/sars-cov-2/haplos.fa",
rec_haplos = "output/sars-cov-2/rec_haplos.fa"
conda: "envs/make_graph.yaml"
threads: 4
shell:
"""
rm -rf output/sars-cov-2/haplos.fa
rm -rf output/sars-cov-2/rec_haplos.fa
cat data/sars-cov-2/haplotypes/*.fasta >> {output.haplos}
cat data/sars-cov-2/rec_haplos/*.fasta >> {output.rec_haplos}
"""
rule install_pbsim:
output:
pbsim = "bin/pbsim",
method ="bin/QSHMM-ONT-HQ.model",
err = "bin/ERRHMM-SEQUEL.model"
shell:
"""
rm -rf pbsim3
git clone https://github.com/yukiteruono/pbsim3 --branch v3.0.4
cd pbsim3
autoreconf -i
./configure
make
cd ..
cp pbsim3/src/pbsim {output.pbsim}
cp pbsim3/data/QSHMM-ONT-HQ.model {output.method}
cp pbsim3/data/ERRHMM-SEQUEL.model {output.err}
rm -rf pbsim3
"""
rule generate_reads:
input:
haplos="output/sars-cov-2/haplos.fa",
pbsim="bin/pbsim"
output:
"output/sars-cov-2/sim/sd_00{haplo}.fastq",
shell:
"""
{input.pbsim} --strategy wgs --method qshmm --qshmm bin/QSHMM-ONT-HQ.model --length-min 25000 --length-max 28000 --length-mean 26500 --accuracy-mean .99 --hp-del-bias 10 --depth 100 --genome {input.haplos} --prefix output/sars-cov-2/sim/sd --difference-ratio 2:1:1 --seed 1234
"""
rule generate_rec_reads:
input:
haplos="output/sars-cov-2/rec_haplos.fa",
pbsim="bin/pbsim"
output:
"output/sars-cov-2/sim_rec/sd_00{rec_haplo}.fastq",
shell:
"""
{input.pbsim} --strategy wgs --method qshmm --qshmm bin/QSHMM-ONT-HQ.model --length-min 25000 --length-max 28000 --length-mean 26500 --accuracy-mean .99 --hp-del-bias 10 --depth 200 --genome {input.haplos} --prefix output/sars-cov-2/sim_rec/sd --difference-ratio 2:1:1 --seed 1234
"""
rule filter_reads:
input:
reads="output/sars-cov-2/sim/sd_00{haplo}.fastq",
pbsim="bin/pbsim",
haplos="data/sars-cov-2/haplotypes/sequence_{haplo}.fasta"
output:
"output/sars-cov-2/sim/filtered_sd_{haplo}_0001.fastq",
shell:
"""
{input.pbsim} --strategy wgs --method sample --sample {input.reads} --genome {input.haplos} --accuracy-min 0.975 --depth 100 --prefix output/sars-cov-2/sim/filtered_sd_{wildcards.haplo} --id-prefix S{wildcards.haplo}_ --seed 1234
"""
rule filter_reads_rec:
input:
reads="output/sars-cov-2/sim_rec/sd_00{rec_haplo}.fastq",
pbsim="bin/pbsim",
haplos="data/sars-cov-2/rec_haplos/sequence_r{rec_haplo}.fasta"
output:
"output/sars-cov-2/sim_rec/filtered_sd_{rec_haplo}_0001.fastq",
shell:
"""
{input.pbsim} --strategy wgs --method sample --sample {input.reads} --genome {input.haplos} --accuracy-min 0.975 --depth 200 --prefix output/sars-cov-2/sim_rec/filtered_sd_{wildcards.rec_haplo} --id-prefix S{wildcards.rec_haplo}_ --seed 1234
"""
rule convert_reads:
input:
"output/sars-cov-2/sim/filtered_sd_{haplo}_0001.fastq",
output:
"output/sars-cov-2/reads/{haplo}/reads.fasta"
shell:
"""
sed -n '1~4s/^@/>/p;2~4p' {input} > {output}
"""
rule convert_reads_rec:
input:
"output/sars-cov-2/sim_rec/filtered_sd_{rec_haplo}_0001.fastq",
output:
"output/sars-cov-2/rec_reads/{rec_haplo}/reads.fasta"
shell:
"""
sed -n '1~4s/^@/>/p;2~4p' {input} > {output}
"""