Skip to content

Commit 910e5db

Browse files
committed
add manifests for whisper
1 parent be001a8 commit 910e5db

File tree

5 files changed

+115
-54
lines changed

5 files changed

+115
-54
lines changed

egs/aishell4/ASR/local/compute_fbank_aishell4.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pathlib import Path
3030

3131
import torch
32-
from lhotse import ChunkedLilcomHdf5Writer, CutSet, Fbank, FbankConfig
32+
from lhotse import ChunkedLilcomHdf5Writer, CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig
3333
from lhotse.recipes.utils import read_manifests_if_cached
3434

3535
from icefall.utils import get_executor, str2bool
@@ -42,10 +42,10 @@
4242
torch.set_num_interop_threads(1)
4343

4444

45-
def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False):
45+
def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
4646
src_dir = Path("data/manifests/aishell4")
4747
output_dir = Path("data/fbank")
48-
num_jobs = min(15, os.cpu_count())
48+
num_jobs = min(8, os.cpu_count())
4949

5050
dataset_parts = (
5151
"train_S",
@@ -70,7 +70,10 @@ def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False):
7070
dataset_parts,
7171
)
7272

73-
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
73+
if whisper_fbank:
74+
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
75+
else:
76+
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
7477

7578
with get_executor() as ex: # Initialize the executor only once.
7679
for partition, m in manifests.items():
@@ -121,7 +124,12 @@ def get_args():
121124
default=False,
122125
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
123126
)
124-
127+
parser.add_argument(
128+
"--whisper-fbank",
129+
type=str2bool,
130+
default=False,
131+
help="Use WhisperFbank instead of Fbank. Default: False.",
132+
)
125133
return parser.parse_args()
126134

127135

@@ -132,5 +140,5 @@ def get_args():
132140

133141
args = get_args()
134142
compute_fbank_aishell4(
135-
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed
143+
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
136144
)

egs/aishell4/ASR/prepare.sh

+14-13
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
55

66
set -eou pipefail
77

8-
stage=-1
9-
stop_stage=100
8+
stage=20
9+
stop_stage=20
1010
perturb_speed=true
1111

1212

@@ -76,14 +76,24 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
7676
fi
7777

7878
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
79-
log "Stage 2: Process aishell4"
79+
log "Stage 2: Compute fbank for aishell4"
8080
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
8181
mkdir -p data/fbank/aishell4
8282
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
8383
touch data/fbank/aishell4/.fbank.done
8484
fi
8585
fi
8686

87+
whisper_mel_bins=80
88+
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
89+
log "Stage 20: Compute whisper fbank for aishell4"
90+
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
91+
mkdir -p data/fbank/aishell4
92+
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
93+
touch data/fbank/aishell4/.fbank.done
94+
fi
95+
fi
96+
8797
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
8898
log "Stage 3: Prepare musan manifest"
8999
# We assume that you have downloaded the musan corpus
@@ -106,16 +116,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
106116
fi
107117

108118
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
109-
log "Stage 5: Compute fbank for aishell4"
110-
if [ ! -f data/fbank/.aishell4.done ]; then
111-
mkdir -p data/fbank
112-
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
113-
touch data/fbank/.aishell4.done
114-
fi
115-
fi
116-
117-
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
118-
log "Stage 6: Prepare char based lang"
119+
log "Stage 5: Prepare char based lang"
119120
lang_char_dir=data/lang_char
120121
mkdir -p $lang_char_dir
121122

egs/alimeeting/ASR/local/compute_fbank_alimeeting.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pathlib import Path
3030

3131
import torch
32-
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter
32+
from lhotse import CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig, LilcomChunkyWriter
3333
from lhotse.recipes.utils import read_manifests_if_cached
3434

3535
from icefall.utils import get_executor, str2bool
@@ -42,10 +42,10 @@
4242
torch.set_num_interop_threads(1)
4343

4444

45-
def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False):
45+
def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
4646
src_dir = Path("data/manifests/alimeeting")
4747
output_dir = Path("data/fbank")
48-
num_jobs = min(15, os.cpu_count())
48+
num_jobs = min(8, os.cpu_count())
4949

5050
dataset_parts = (
5151
"train",
@@ -70,7 +70,10 @@ def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False
7070
dataset_parts,
7171
)
7272

73-
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
73+
if whisper_fbank:
74+
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
75+
else:
76+
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
7477

7578
with get_executor() as ex: # Initialize the executor only once.
7679
for partition, m in manifests.items():
@@ -121,7 +124,12 @@ def get_args():
121124
default=False,
122125
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
123126
)
124-
127+
parser.add_argument(
128+
"--whisper-fbank",
129+
type=str2bool,
130+
default=False,
131+
help="Use the Whisper Fbank feature extractor. Default: False.",
132+
)
125133
return parser.parse_args()
126134

127135

@@ -132,5 +140,5 @@ def get_args():
132140

133141
args = get_args()
134142
compute_fbank_alimeeting(
135-
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed
143+
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
136144
)

egs/alimeeting/ASR/prepare.sh

+11-11
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,22 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
6666
fi
6767

6868
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
69-
log "Stage 2: Process alimeeting"
69+
log "Stage 2: compute fbank for alimeeting"
7070
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
7171
mkdir -p data/fbank/alimeeting
7272
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed}
7373
fi
7474
fi
7575

76+
whisper_mel_bins=80
77+
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
78+
log "Stage 20: compute whisper fbank for alimeeting"
79+
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
80+
mkdir -p data/fbank/alimeeting
81+
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
82+
fi
83+
fi
84+
7685
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
7786
log "Stage 3: Prepare musan manifest"
7887
# We assume that you have downloaded the musan corpus
@@ -95,16 +104,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
95104
fi
96105

97106
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
98-
log "Stage 5: Compute fbank for alimeeting"
99-
if [ ! -f data/fbank/.alimeeting.done ]; then
100-
mkdir -p data/fbank
101-
./local/compute_fbank_alimeeting.py --perturb-speed True
102-
touch data/fbank/.alimeeting.done
103-
fi
104-
fi
105-
106-
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
107-
log "Stage 6: Prepare char based lang"
107+
log "Stage 5: Prepare char based lang"
108108
lang_char_dir=data/lang_char
109109
mkdir -p $lang_char_dir
110110

egs/multi_zh-hans/ASR/prepare.sh

+62-18
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
6060

6161
if [ ! -f data/fbank/.thchs30.done ]; then
6262
mkdir -p data/fbank
63-
./local/compute_fbank_thchs30.py
63+
./local/compute_fbank_thchs30.py --speed-perturb true
6464
touch data/fbank/.thchs30.done
6565
fi
6666
fi
@@ -137,7 +137,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
137137

138138
if [ ! -f data/fbank/.stcmds.done ]; then
139139
mkdir -p data/fbank
140-
./local/compute_fbank_stcmds.py
140+
./local/compute_fbank_stcmds.py --speed-perturb true
141141
touch data/fbank/.stcmds.done
142142
fi
143143
fi
@@ -151,15 +151,15 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
151151
lhotse download primewords $dl_dir/primewords
152152
fi
153153

154-
if [ ! -f data/manifests/.stcmds.done ]; then
154+
if [ ! -f data/manifests/.primewords.done ]; then
155155
mkdir -p data/manifests
156-
lhotse prepare stcmds $dl_dir/primewords data/manifests/primewords
156+
lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
157157
touch data/manifests/.primewords.done
158158
fi
159159

160160
if [ ! -f data/fbank/.primewords.done ]; then
161161
mkdir -p data/fbank
162-
./local/compute_fbank_primewords.py
162+
./local/compute_fbank_primewords.py --speed-perturb true
163163
touch data/fbank/.primewords.done
164164
fi
165165
fi
@@ -180,7 +180,7 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
180180

181181
if [ ! -f data/fbank/.magicdata.done ]; then
182182
mkdir -p data/fbank
183-
./local/compute_fbank_magicdata.py
183+
./local/compute_fbank_magicdata.py --speed-perturb true
184184
touch data/fbank/.magicdata.done
185185
fi
186186
fi
@@ -291,10 +291,10 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
291291
fi
292292

293293
log "Compute KeSpeech fbank for train_phase1"
294-
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1
294+
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1
295295

296296
log "Compute KeSpeech fbank for train_phase2"
297-
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2
297+
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2
298298

299299
log "Compute KeSpeech fbank for test/dev"
300300
./local/compute_fbank_kespeech_dev_test.py
@@ -344,10 +344,10 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
344344
fi
345345

346346
log "Compute KeSpeech fbank for train_phase1"
347-
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
347+
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
348348

349349
log "Compute KeSpeech fbank for train_phase2"
350-
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
350+
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
351351

352352
log "Compute KeSpeech fbank for test/dev"
353353
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
@@ -356,19 +356,63 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
356356
fi
357357
fi
358358

359-
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
360-
log "Stage 121: tmp"
361-
log "Compute KeSpeech fbank for train_phase1"
362-
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --stop 1 --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
359+
if [ $stage -le 122 ] && [ $stop_stage -ge 122 ]; then
360+
log "Stage 122: Prepare speed perturb versionKeSpeech for whisper"
361+
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
363362

364363
log "Compute KeSpeech fbank for train_phase2"
365-
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
364+
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
365+
fi
366366

367-
log "Compute KeSpeech fbank for test/dev"
368-
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
367+
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
368+
log "Stage 121: Prepare MagicData, Primewords, ST-CMDS, THCHS-30 for whisper"
369369

370-
touch data/fbank/.kespeech.done
370+
if [ ! -f data/manifests/.magicdata.done ]; then
371+
mkdir -p data/manifests
372+
lhotse prepare magicdata $dl_dir/magicdata data/manifests/magicdata
373+
touch data/manifests/.magicdata.done
374+
fi
375+
376+
if [ ! -f data/manifests/.primewords.done ]; then
377+
mkdir -p data/manifests
378+
lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
379+
touch data/manifests/.primewords.done
380+
fi
381+
if [ ! -f data/manifests/.stcmds.done ]; then
382+
mkdir -p data/manifests
383+
lhotse prepare stcmds $dl_dir/stcmds data/manifests/stcmds
384+
touch data/manifests/.stcmds.done
385+
fi
386+
387+
if [ ! -f data/manifests/.thchs30.done ]; then
388+
mkdir -p data/manifests
389+
lhotse prepare thchs-30 $dl_dir/thchs30 data/manifests/thchs30
390+
touch data/manifests/.thchs30.done
391+
fi
392+
393+
if [ ! -f data/fbank/.thchs30.done ]; then
394+
mkdir -p data/fbank
395+
./local/compute_fbank_thchs30.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
396+
touch data/fbank/.thchs30.done
371397
fi
398+
399+
if [ ! -f data/fbank/.stcmds.done ]; then
400+
mkdir -p data/fbank
401+
./local/compute_fbank_stcmds.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
402+
touch data/fbank/.stcmds.done
403+
fi
404+
if [ ! -f data/fbank/.magicdata.done ]; then
405+
mkdir -p data/fbank
406+
./local/compute_fbank_magicdata.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
407+
touch data/fbank/.magicdata.done
408+
fi
409+
410+
if [ ! -f data/fbank/.primewords.done ]; then
411+
mkdir -p data/fbank
412+
./local/compute_fbank_primewords.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
413+
touch data/fbank/.primewords.done
414+
fi
415+
372416
fi
373417

374418

0 commit comments

Comments
 (0)