-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathsettings.cfg
277 lines (202 loc) · 10.7 KB
/
settings.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#**********************************************
#* Configuration file for HybPhyloMaker *
#* https://github.com/tomas-fer/HybPhyloMaker *
#* Tomas Fer, 2024 *
#* [email protected] *
#* v.1.8.0i *
#**********************************************
#--------------------------------------------------------------------------------------------------------------------
# **** GENERAL SETTINGS ****
#--------------------------------------------------------------------------------------------------------------------
# Metacentrum (1) or local (0) or Hydra (2)
location="1"
# If on MetaCentrum select server (brno2, praha1, plzen1, budejovice1, brno6, brno3-cerit, brno9-ceitec, ostrava1)
server=brno2
#Where is your data folder
data=testdata
#File for adapter removal (must be in HybSeqSource)
adapterfile=NEBNext-PE.fa
#--------------------------------------------------------------------------------------------------------------------
# **** GENE TREE SETTINGS ****
#--------------------------------------------------------------------------------------------------------------------
#Tree-building method (RAxML or FastTree)
tree=RAxML
#Bootstrap FastTree (yes/no). no=normal FastTree with local branch support, very fast; yes=FastTree is applied to each of 100 bootstrap replicates, slow
FastTreeBoot=no
#Standard or rapid bootstrapping in RAxML (standard or rapid)
raxmlboot=rapid
#Number of bootstrap replicates for RAxML gene trees
bsrep=100
#Use bootstopping strategy for RAxML gene trees, i.e., bootstrapping until convergence (yes/no)
bootstop=no
#Evolutionary model for RAxML (GTRGAMMA, GTRGAMMAI, GTRCAT, GTRCATI)
#NOTE: Do not use CAT model for datasets with less than 50 taxa! Do not combine GTRCAT and rapid bootstrapping!
model=GTRGAMMA
#Use 'no', 'by exon' or 'by codon and exon' partitioning when building gene trees with RAxML (no, exon, codon)
#Codon is only for the data with corrected reading frame!
genetreepart=exon
#Outgroup (as it appears in RAxML files)
OUTGROUP="Siphonochilus-aethiopicus_S130"
#--------------------------------------------------------------------------------------------------------------------
# **** SPECIES TREE SETTINGS ****
#--------------------------------------------------------------------------------------------------------------------
#Bootstrap ExaML (yes/no)
examlboot=no
#Multilocus bootstrap for Astral and Astrid trees (yes/no)
mlbs=no
#Combine support values from main, bootstrap and bootstrap consensus trees to one tree (for Astral and Astrid trees)
combine=no
#Collapse trees for ASTRAL (integer, 0-99), set to '0' if no collapsing is requested
collapse=0
#Calculate ASTRAL tree with local posterior probabilities of three alternative hypotheses using '-t 4' (yes/no)
astralt4=yes
#Work only with trees with requisite taxa present (yes/no)
requisite=no
#List of requisite taxa (write taxon names separated by "|", the whole expression must be within quotes!)
requisitetaxa="Siphonochilus"
#Number of MCMC steps for BUCKy
nrbucky=1000000
#Number of runs for BUCKy (parameter '-k')
nrruns=2
#Number of chains for BUCKy (parameter '-c')
nrchains=2
#Parameter 'alpha' for BUCKy (parameter '-a')
alpha=1
#Parameter '-s' for PhyParts (cut-off for bootstrap support in gene trees)
phypartsbs=0.5
#Colours for PhyParts pie charts (four named colours separated by spaces, the whole expression must be within quotes)
#see https://en.wikipedia.org/wiki/Web_colors for colour's names
#leave empty for original colours
ppcolors="blue green red silver"
#Get only first XXX gene trees for PhyParts (leave empty if all trees should be used)
nrpptrees=
#Species tree for Quartet Sampling (Astral, FastTree or ExaML)
qstree=FastTree
#Constraint tree for ExaML. The tree should be in HybSeqSource folder and needs to contain all taxa of the alignment!
constrtree=constrtree.tre
#SNP thinning for Dsuite (first, random or thinning)
SNPs=thinning
#hstart for SNaQ/PhyloNet. For SNaQ this should be set to '0' unless further implemented!
hstart=0
#hmax for SNaQ/PhyloNet. Maximum number of hybrid nodes.
hmax=5
#Number of runs of the PhyloNet search (parameter '-x')
numruns=10
#--------------------------------------------------------------------------------------------------------------------
# **** MISSING DATA SETTINGS ****
#--------------------------------------------------------------------------------------------------------------------
#Delete species with more than % of missing data
MISSINGPERCENT=70
#Only include loci with at least % of species
SPECIESPRESENCE=75
#Removing gap and 'n' only positions in exon alignments using trimAl (yes/no)
noallgaps=no
#Trimming exon alignments using trimAl gappyout (yes/no)
gappyout=no
#--------------------------------------------------------------------------------------------------------------------
# **** TYPE OF DATA ****
#--------------------------------------------------------------------------------------------------------------------
#Working with cpDNA (yes/no)
cp=no
#Working with updated list of genes (yes/no)
update=no
#Working with corrected reading frame for exons/genes (yes/no)
corrected=no
#Working with data trimmed by trimAl (yes/no)
trimmed=no
#Maximum number of stop codons allowed per alignment (i.e., considered as errors)
maxstop=5
#Name of the sample selection folder (to be created with script 12)
selection=
#--------------------------------------------------------------------------------------------------------------------
# **** REFERENCE FILES ****
#--------------------------------------------------------------------------------------------------------------------
#Number of Ns to separate exons in pseudoreference (400 is recommended for 2x150 bp reads and 800 for 2x250 bp reads)
nrns=400
#File name with exonic probe sequences (must be stored in HybSeqSource folder)
probes=curcuma_HybSeqProbes_test.fa
#Minimum sequence identity between probe and sample (default is 90) - used in BLAT
minident=90
#File name with cpDNA CDS sequences (must be stored in HybSeqSource folder)
cpDNACDS=Curcuma-roscoeana_plastomeCDS_test.fa
#File name with full plastome cpDNA reference (one IR should be removed)
cpDNA=
#File name with full plastome cpDNA (GenBank flat file) (must be stored in HybSeqSource folder)
cpGBfile=
#GenBank accession number of full plastome cpDNA record (e.g. KR967361)
cpGBnr=
#Minimum sequence length in plastome reference (to be generated by script 0f)
#mincplength=200
#--------------------------------------------------------------------------------------------------------------------
# **** PATH TO DATA ****
#--------------------------------------------------------------------------------------------------------------------
#path to other transcriptomes/genomes to combine (NO if no other data sources available)
othersource=NO
#path to other pslx files to combine
otherpslx=pslx_to_combine
#path to other cpDNA pslx files to combine
otherpslxcp=pslx_cpDNA_to_combine
#--------------------------------------------------------------------------------------------------------------------
# **** SOFTWARE BINARIES AND NUMBER OF CORES ****
#--------------------------------------------------------------------------------------------------------------------
#binary name for sequential version of RAxML (raxmlHPC, raxmlHPC-SSE3, or raxmlHPC-AVX)
raxmlseq=raxmlHPC-SSE3
#binary name for Pthreads version of RAxML (raxmlHPC-PTHREADS, raxmlHPC-PTHREADS-SSE3, or raxmlHPC-PTHREADS-AVX)
raxmlpthreads=raxmlHPC-PTHREADS-SSE3
#binary name for FastTree
fasttreebin=FastTree
#java file for ASTRAL (e.g., astral.5.7.7.jar - this file must be in HybSeqSource together with lib folder - see Astral homepage)
astraljar=astral.5.7.7.jar
#binary name for ASTRID (ASTRID, ASTRID-linux, or ASTRID-osx) - must be in HybSeqSource
astridbin=ASTRID
#binary name for ExaML (examl, examl-AVX, or examl-OMP-AVX)
examlbin=examl
#number of cores/threads available (not applicable for clusters where number of cores is set using PBS and passed through env variables)
numbcores=2
#--------------------------------------------------------------------------------------------------------------------
# **** PARALLELIZATION SETTINGS ****
#--------------------------------------------------------------------------------------------------------------------
#parallel MAFFT (yes or no)
parallelmafft=yes
#parallel RAxML (yes or no)
parallelraxml=no
#how many RAxML calculation will be calculated per single submitted job (number of jobs = number of genes / raxmlperjob)
raxmlperjob=10
#--------------------------------------------------------------------------------------------------------------------
# *** MAPPING AND CONSENSUS SETTINGS, HETEROZYGOSITY ****
#--------------------------------------------------------------------------------------------------------------------
#mapping method (bowtie2/bwa)
mappingmethod=bwa
#whether mapping using bowtie2 should be done (yes/no)
mapping=yes
#consensus calling software (kindel/ococo/consensusfixer); kindel/ococo for majority base, consensusfixer for ambiguities
conscall=kindel
#minimum site coverage for SNP calling (N will be in consensus for sites with lower coverage)
mincov=2
#majority threshold for consensus calling (0-1) - not working in OCOCO!
majthres=0.51
#minimal relative frequency of alternative base to call as ambiguity (only for ConsensusFixer)
plurality=0.3
#calculate number of heterozygous sites per exon
nohetcalculation=yes
#maximum number of heterozygous sites per exon to include it
maxhet=4
#--------------------------------------------------------------------------------------------------------------------
# *** DATA DOWNLOAD SETTINGS **** (currently not working due to change on Illumina BaseSpace web page!!!)
#--------------------------------------------------------------------------------------------------------------------
#Download samples from Illumina BaseSpace (yes/no)
download=no
#ID of the Illumina BaseSpace project from which the samples should be downloaded
projectID=
#ID of the Illumina BaseSpace run (only if a basic run statistics is required)
runID=
#Illumina BaseSpace API server (see https://developer.basespace.illumina.com/docs/content/documentation/cli/cli-overview#SpecifyAPIserverandAccessToken)
#bsserver=https://api.euc1.sh.basespace.illumina.com
bsserver=https://api.basespace.illumina.com
#--------------------------------------------------------------------------------------------------------------------
# **** OTHER SETTINGS ****
#--------------------------------------------------------------------------------------------------------------------
#using AMAS (fast/slow); try 'slow' only if you observe 'Argument list too long' error
AMAS=fast
#number of exons for generating test dataset (script 0d)
nrexons=958 #this is equivalent of 250 loci for Curcuma probes