k2-fsa
diff --git a/‎egs/gigaspeech/ASR/zipformer/train.py
+19-8 b/‎egs/gigaspeech/ASR/zipformer/train.py
+19-8
diff --git a/‎egs/gigaspeech/KWS/zipformer/asr_datamodule.py
+1-8 b/‎egs/gigaspeech/KWS/zipformer/asr_datamodule.py
+1-8
diff --git a/‎egs/gigaspeech/KWS/zipformer/decode.py
+1-1 b/‎egs/gigaspeech/KWS/zipformer/decode.py
+1-1
diff --git a/‎egs/gigaspeech/KWS/zipformer/decode-asr.py ‎egs/gigaspeech/KWS/zipformer/decode_asr.py
+3-2 b/‎egs/gigaspeech/KWS/zipformer/decode-asr.py ‎egs/gigaspeech/KWS/zipformer/decode_asr.py
+3-2
@@ -416,6 +416,17 @@ def get_parser():
         help="Accumulate stats on activations, print them and exit.",
     )
 
+    parser.add_argument(
+        "--scan-for-oom-batches",
+        type=str2bool,
+        default=False,
+        help="""
+        Whether to scan for oom batches before training, this is helpful for
+        finding the suitable max_duration, you only need to run it once.
+        Caution: a little time consuming.
+        """,
+    )
+
     parser.add_argument(
         "--inf-check",
         type=str2bool,
@@ -1197,14 +1208,14 @@ def remove_short_utt(c: Cut):
     valid_cuts = valid_cuts.filter(remove_short_utt)
     valid_dl = gigaspeech.valid_dataloaders(valid_cuts)
 
-    # if not params.print_diagnostics:
-    #    scan_pessimistic_batches_for_oom(
-    #        model=model,
-    #        train_dl=train_dl,
-    #        optimizer=optimizer,
-    #        sp=sp,
-    #        params=params,
-    #    )
+    if not params.print_diagnostics and params.scan_for_oom_batches:
+        scan_pessimistic_batches_for_oom(
+            model=model,
+            train_dl=train_dl,
+            optimizer=optimizer,
+            sp=sp,
+            params=params,
+        )
 
     scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0)
     if checkpoints and "grad_scaler" in checkpoints:
 
@@ -1,5 +1,5 @@
 # Copyright      2021  Piotr Żelasko
-# Copyright      2023  Xiaomi Corporation     (Author: Yifan Yang)
+# Copyright      2024  Xiaomi Corporation     (Author: Wei Kang)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
@@ -448,13 +448,6 @@ def test_cuts(self) -> CutSet:
             self.args.manifest_dir / "gigaspeech_cuts_TEST.jsonl.gz"
         )
 
-    @lru_cache()
-    def libri_100_cuts(self) -> CutSet:
-        logging.info("About to get libri100 cuts")
-        return load_manifest_lazy(
-            self.args.manifest_dir / "librispeech_cuts_train-clean-100.jsonl.gz"
-        )
-
     @lru_cache()
     def fsc_train_cuts(self) -> CutSet:
         logging.info("About to get fluent speech commands train cuts")
 
@@ -274,7 +274,7 @@ def decode_one_batch(
         model=model,
         encoder_out=encoder_out,
         encoder_out_lens=encoder_out_lens,
-        context_graph=kws_graph,
+        keywords_graph=kws_graph,
         beam=params.beam,
         num_tailing_blanks=params.num_tailing_blanks,
         blank_penalty=params.blank_penalty,
 
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 #
-# Copyright 2021-2023 Xiaomi Corporation (Author: Fangjun Kuang,
-#                                                 Zengwei Yao)
+# Copyright 2021-2024 Xiaomi Corporation (Author: Fangjun Kuang,
+#                                                 Zengwei Yao,
+#                                                 Wei Kang)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`# Copyright 2021 Piotr Żelasko`
`2`		`-# Copyright 2023 Xiaomi Corporation (Author: Yifan Yang)`
	`2`	`+# Copyright 2024 Xiaomi Corporation (Author: Wei Kang)`
`3`	`3`	`#`
`4`	`4`	`# See ../../../../LICENSE for clarification regarding multiple authors`
`5`	`5`	`#`
`@@ -448,13 +448,6 @@ def test_cuts(self) -> CutSet:`
`448`	`448`	`self.args.manifest_dir / "gigaspeech_cuts_TEST.jsonl.gz"`
`449`	`449`	`)`
`450`	`450`
`451`		`- @lru_cache()`
`452`		`- def libri_100_cuts(self) -> CutSet:`
`453`		`- logging.info("About to get libri100 cuts")`
`454`		`- return load_manifest_lazy(`
`455`		`- self.args.manifest_dir / "librispeech_cuts_train-clean-100.jsonl.gz"`
`456`		`- )`
`457`		`-`
`458`	`451`	`@lru_cache()`
`459`	`452`	`def fsc_train_cuts(self) -> CutSet:`
`460`	`453`	`logging.info("About to get fluent speech commands train cuts")`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,8 @@`
`1`	`1`	`#!/usr/bin/env python3`
`2`	`2`	`#`
`3`		`-# Copyright 2021-2023 Xiaomi Corporation (Author: Fangjun Kuang,`
`4`		`-# Zengwei Yao)`
	`3`	`+# Copyright 2021-2024 Xiaomi Corporation (Author: Fangjun Kuang,`
	`4`	`+# Zengwei Yao,`
	`5`	`+# Wei Kang)`
`5`	`6`	`#`
`6`	`7`	`# See ../../../../LICENSE for clarification regarding multiple authors`
`7`	`8`	`#`