@@ -60,7 +60,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
60
60
61
61
if [ ! -f data/fbank/.thchs30.done ]; then
62
62
mkdir -p data/fbank
63
- ./local/compute_fbank_thchs30.py
63
+ ./local/compute_fbank_thchs30.py --speed-perturb true
64
64
touch data/fbank/.thchs30.done
65
65
fi
66
66
fi
@@ -137,7 +137,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
137
137
138
138
if [ ! -f data/fbank/.stcmds.done ]; then
139
139
mkdir -p data/fbank
140
- ./local/compute_fbank_stcmds.py
140
+ ./local/compute_fbank_stcmds.py --speed-perturb true
141
141
touch data/fbank/.stcmds.done
142
142
fi
143
143
fi
@@ -151,15 +151,15 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
151
151
lhotse download primewords $dl_dir /primewords
152
152
fi
153
153
154
- if [ ! -f data/manifests/.stcmds .done ]; then
154
+ if [ ! -f data/manifests/.primewords .done ]; then
155
155
mkdir -p data/manifests
156
- lhotse prepare stcmds $dl_dir /primewords data/manifests/primewords
156
+ lhotse prepare primewords $dl_dir /primewords data/manifests/primewords
157
157
touch data/manifests/.primewords.done
158
158
fi
159
159
160
160
if [ ! -f data/fbank/.primewords.done ]; then
161
161
mkdir -p data/fbank
162
- ./local/compute_fbank_primewords.py
162
+ ./local/compute_fbank_primewords.py --speed-perturb true
163
163
touch data/fbank/.primewords.done
164
164
fi
165
165
fi
@@ -180,7 +180,7 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
180
180
181
181
if [ ! -f data/fbank/.magicdata.done ]; then
182
182
mkdir -p data/fbank
183
- ./local/compute_fbank_magicdata.py
183
+ ./local/compute_fbank_magicdata.py --speed-perturb true
184
184
touch data/fbank/.magicdata.done
185
185
fi
186
186
fi
@@ -291,10 +291,10 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
291
291
fi
292
292
293
293
log " Compute KeSpeech fbank for train_phase1"
294
- ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1
294
+ ./local/compute_fbank_kespeech_splits.py --speed-perturb true -- num-splits ${num_splits} --training-subset train_phase1
295
295
296
296
log " Compute KeSpeech fbank for train_phase2"
297
- ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2
297
+ ./local/compute_fbank_kespeech_splits.py --speed-perturb true -- num-splits ${num_splits} --training-subset train_phase2
298
298
299
299
log " Compute KeSpeech fbank for test/dev"
300
300
./local/compute_fbank_kespeech_dev_test.py
@@ -344,10 +344,10 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
344
344
fi
345
345
346
346
log " Compute KeSpeech fbank for train_phase1"
347
- ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
347
+ ./local/compute_fbank_kespeech_splits.py --speed-perturb true -- num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
348
348
349
349
log " Compute KeSpeech fbank for train_phase2"
350
- ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
350
+ ./local/compute_fbank_kespeech_splits.py --speed-perturb true -- num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
351
351
352
352
log " Compute KeSpeech fbank for test/dev"
353
353
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
@@ -356,19 +356,63 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
356
356
fi
357
357
fi
358
358
359
- if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
360
- log " Stage 121: tmp"
361
- log " Compute KeSpeech fbank for train_phase1"
362
- ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --stop 1 --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
359
+ if [ $stage -le 122 ] && [ $stop_stage -ge 122 ]; then
360
+ log " Stage 122: Prepare speed perturb versionKeSpeech for whisper"
361
+ ./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
363
362
364
363
log " Compute KeSpeech fbank for train_phase2"
365
- ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
364
+ ./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
365
+ fi
366
366
367
- log " Compute KeSpeech fbank for test/dev "
368
- ./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
367
+ if [ $stage -le 121 ] && [ $stop_stage -ge 121 ] ; then
368
+ log " Stage 121: Prepare MagicData, Primewords, ST-CMDS, THCHS-30 for whisper "
369
369
370
- touch data/fbank/.kespeech.done
370
+ if [ ! -f data/manifests/.magicdata.done ]; then
371
+ mkdir -p data/manifests
372
+ lhotse prepare magicdata $dl_dir /magicdata data/manifests/magicdata
373
+ touch data/manifests/.magicdata.done
374
+ fi
375
+
376
+ if [ ! -f data/manifests/.primewords.done ]; then
377
+ mkdir -p data/manifests
378
+ lhotse prepare primewords $dl_dir /primewords data/manifests/primewords
379
+ touch data/manifests/.primewords.done
380
+ fi
381
+ if [ ! -f data/manifests/.stcmds.done ]; then
382
+ mkdir -p data/manifests
383
+ lhotse prepare stcmds $dl_dir /stcmds data/manifests/stcmds
384
+ touch data/manifests/.stcmds.done
385
+ fi
386
+
387
+ if [ ! -f data/manifests/.thchs30.done ]; then
388
+ mkdir -p data/manifests
389
+ lhotse prepare thchs-30 $dl_dir /thchs30 data/manifests/thchs30
390
+ touch data/manifests/.thchs30.done
391
+ fi
392
+
393
+ if [ ! -f data/fbank/.thchs30.done ]; then
394
+ mkdir -p data/fbank
395
+ ./local/compute_fbank_thchs30.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
396
+ touch data/fbank/.thchs30.done
371
397
fi
398
+
399
+ if [ ! -f data/fbank/.stcmds.done ]; then
400
+ mkdir -p data/fbank
401
+ ./local/compute_fbank_stcmds.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
402
+ touch data/fbank/.stcmds.done
403
+ fi
404
+ if [ ! -f data/fbank/.magicdata.done ]; then
405
+ mkdir -p data/fbank
406
+ ./local/compute_fbank_magicdata.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
407
+ touch data/fbank/.magicdata.done
408
+ fi
409
+
410
+ if [ ! -f data/fbank/.primewords.done ]; then
411
+ mkdir -p data/fbank
412
+ ./local/compute_fbank_primewords.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
413
+ touch data/fbank/.primewords.done
414
+ fi
415
+
372
416
fi
373
417
374
418
0 commit comments