Skip to content

Commit

Permalink
Commit more scripts for wenetspeech kws recipe
Browse files Browse the repository at this point in the history
  • Loading branch information
pkufool committed Feb 2, 2024
1 parent 4b33563 commit 8b65f41
Show file tree
Hide file tree
Showing 10 changed files with 2,353 additions and 147 deletions.
7 changes: 7 additions & 0 deletions egs/gigaspeech/KWS/zipformer/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,13 @@ def get_parser():
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument(
"--bpe-model",
type=str,
default="data/lang_bpe_500/bpe.model",
help="Path to the BPE model",
)

add_training_arguments(parser)
add_model_arguments(parser)
add_finetune_arguments(parser)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -407,10 +407,3 @@ def test_net_cuts(self) -> List[CutSet]:
def test_meeting_cuts(self) -> List[CutSet]:
logging.info("About to get TEST_MEETING cuts")
return load_manifest_lazy(self.args.manifest_dir / "cuts_TEST_MEETING.jsonl.gz")

@lru_cache()
def test_open_commands_cuts(self) -> CutSet:
logging.info("About to get open commands cuts")
return load_manifest_lazy(
self.args.manifest_dir / "open-commands-cn_cuts_test.jsonl.gz"
)
1 change: 1 addition & 0 deletions egs/wenetspeech/KWS/shared
49 changes: 46 additions & 3 deletions egs/wenetspeech/KWS/zipformer/asr_datamodule.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright 2021 Piotr Żelasko
# Copyright 2024 Xiaomi Corporation (Author: Wei Kang)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
Expand Down Expand Up @@ -409,8 +410,50 @@ def test_meeting_cuts(self) -> List[CutSet]:
return load_manifest_lazy(self.args.manifest_dir / "cuts_TEST_MEETING.jsonl.gz")

@lru_cache()
def test_open_commands_cuts(self) -> CutSet:
logging.info("About to get open commands cuts")
def cn_speech_commands_small_cuts(self) -> CutSet:
logging.info("About to get cn speech commands small cuts")
return load_manifest_lazy(
self.args.manifest_dir / "open-commands-cn_cuts_test.jsonl.gz"
self.args.manifest_dir / "cn_speech_commands_cuts_small.jsonl.gz"
)

@lru_cache()
def cn_speech_commands_large_cuts(self) -> CutSet:
logging.info("About to get cn speech commands large cuts")
return load_manifest_lazy(
self.args.manifest_dir / "cn_speech_commands_cuts_large.jsonl.gz"
)

@lru_cache()
def nihaowenwen_dev_cuts(self) -> CutSet:
logging.info("About to get nihaowenwen dev cuts")
return load_manifest_lazy(
self.args.manifest_dir / "nihaowenwen_cuts_dev.jsonl.gz"
)

@lru_cache()
def nihaowenwen_test_cuts(self) -> CutSet:
logging.info("About to get nihaowenwen test cuts")
return load_manifest_lazy(
self.args.manifest_dir / "nihaowenwen_cuts_test.jsonl.gz"
)

@lru_cache()
def nihaowenwen_train_cuts(self) -> CutSet:
logging.info("About to get nihaowenwen train cuts")
return load_manifest_lazy(
self.args.manifest_dir / "nihaowenwen_cuts_train.jsonl.gz"
)

@lru_cache()
def xiaoyun_clean_cuts(self) -> CutSet:
logging.info("About to get xiaoyun clean cuts")
return load_manifest_lazy(
self.args.manifest_dir / "xiaoyun_cuts_clean.jsonl.gz"
)

@lru_cache()
def xiaoyun_noisy_cuts(self) -> CutSet:
logging.info("About to get xiaoyun noisy cuts")
return load_manifest_lazy(
self.args.manifest_dir / "xiaoyun_cuts_noisy.jsonl.gz"
)
Loading

0 comments on commit 8b65f41

Please sign in to comment.