Skip to content

Commit fd4ebf3

Browse files
committed
add manifest dir option
1 parent 46605ea commit fd4ebf3

File tree

5 files changed

+36
-10
lines changed

5 files changed

+36
-10
lines changed

egs/aishell/ASR/local/compute_fbank_aishell.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,13 @@
5050

5151

5252
def compute_fbank_aishell(
53-
num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False
53+
num_mel_bins: int = 80,
54+
perturb_speed: bool = False,
55+
whisper_fbank: bool = False,
56+
output_dir: str = "data/fbank",
5457
):
5558
src_dir = Path("data/manifests")
56-
output_dir = Path("data/fbank")
59+
output_dir = Path(output_dir)
5760
num_jobs = min(15, os.cpu_count())
5861

5962
dataset_parts = (
@@ -130,6 +133,12 @@ def get_args():
130133
default=False,
131134
help="Use WhisperFbank instead of Fbank. Default: False.",
132135
)
136+
parser.add_argument(
137+
"--output-dir",
138+
type=str,
139+
default="data/fbank",
140+
help="Output directory. Default: data/fbank.",
141+
)
133142
return parser.parse_args()
134143

135144

@@ -143,4 +152,5 @@ def get_args():
143152
num_mel_bins=args.num_mel_bins,
144153
perturb_speed=args.perturb_speed,
145154
whisper_fbank=args.whisper_fbank,
155+
output_dir=args.output_dir,
146156
)

egs/aishell/ASR/prepare.sh

+6-5
Original file line numberDiff line numberDiff line change
@@ -379,12 +379,13 @@ fi
379379

380380
# whisper large-v3 using 128 mel bins, others using 80 mel bins
381381
whisper_mel_bins=80
382+
output_dir=data/fbank_whisper
382383
if [ $stage -le 30 ] && [ $stop_stage -ge 30 ]; then
383384
log "Stage 30: Compute ${whisper_mel_bins} dim fbank for whisper model fine-tuning"
384-
if [ ! -f data/fbank/.aishell.whisper.done ]; then
385-
mkdir -p data/fbank
386-
./local/compute_fbank_aishell.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
387-
./local/compute_fbank_musan.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
388-
touch data/fbank/.aishell.whisper.done
385+
if [ ! -f $output_dir/.aishell.whisper.done ]; then
386+
mkdir -p $output_dir
387+
./local/compute_fbank_aishell.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true --output-dir $output_dir
388+
./local/compute_fbank_musan.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true --output-dir $output_dir
389+
touch $output_dir/.aishell.whisper.done
389390
fi
390391
fi

egs/aishell/ASR/whisper/decode.py

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
--exp-dir whisper/exp_large_v2 \
2929
--model-name large-v2 \
3030
--epoch 999 --avg 1 \
31+
--manifest-dir data/fbank_whisper \
3132
--beam-size 10 --max-duration 50
3233
3334
# Command for decoding using pretrained models (before fine-tuning):
@@ -36,6 +37,7 @@
3637
--exp-dir whisper/exp_large_v2 \
3738
--model-name large-v2 \
3839
--epoch -1 --avg 1 \
40+
--manifest-dir data/fbank_whisper \
3941
--remove-whisper-encoder-input-length-restriction False \
4042
--beam-size 10 --max-duration 50
4143

egs/aishell/ASR/whisper/train.py

+3
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323
--max-duration 200 \
2424
--exp-dir whisper/exp_large_v2 \
2525
--model-name large-v2 \
26+
--manifest-dir data/fbank_whisper \
2627
--deepspeed \
2728
--deepspeed_config ./whisper/ds_config_zero1.json
2829
2930
# fine-tuning with ddp
3031
torchrun --nproc-per-node 8 ./whisper/train.py \
3132
--max-duration 200 \
3233
--exp-dir whisper/exp_medium \
34+
--manifest-dir data/fbank_whisper \
3335
--base-lr 1e-5 \
3436
--model-name medium
3537
"""
@@ -253,6 +255,7 @@ def get_params() -> AttributeDict:
253255
params = AttributeDict(
254256
{
255257
"frame_shift_ms": 10.0,
258+
"subsampling_factor": 2,
256259
"allowed_excess_duration_ratio": 0.1,
257260
"best_train_loss": float("inf"),
258261
"best_valid_loss": float("inf"),

egs/librispeech/ASR/local/compute_fbank_musan.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,11 @@ def is_cut_long(c: MonoCut) -> bool:
5454
return c.duration > 5
5555

5656

57-
def compute_fbank_musan(num_mel_bins: int = 80, whisper_fbank: bool = False):
57+
def compute_fbank_musan(
58+
num_mel_bins: int = 80, whisper_fbank: bool = False, output_dir: str = "data/fbank"
59+
):
5860
src_dir = Path("data/manifests")
59-
output_dir = Path("data/fbank")
61+
output_dir = Path(output_dir)
6062
num_jobs = min(15, os.cpu_count())
6163

6264
dataset_parts = (
@@ -129,6 +131,12 @@ def get_args():
129131
default=False,
130132
help="Use WhisperFbank instead of Fbank. Default: False.",
131133
)
134+
parser.add_argument(
135+
"--output-dir",
136+
type=str,
137+
default="data/fbank",
138+
help="Output directory. Default: data/fbank.",
139+
)
132140
return parser.parse_args()
133141

134142

@@ -138,5 +146,7 @@ def get_args():
138146
logging.basicConfig(format=formatter, level=logging.INFO)
139147
args = get_args()
140148
compute_fbank_musan(
141-
num_mel_bins=args.num_mel_bins, whisper_fbank=args.whisper_fbank
149+
num_mel_bins=args.num_mel_bins,
150+
whisper_fbank=args.whisper_fbank,
151+
output_dir=args.output_dir,
142152
)

0 commit comments

Comments
 (0)