-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_cli.sh
More file actions
executable file
·88 lines (82 loc) · 3.18 KB
/
run_cli.sh
File metadata and controls
executable file
·88 lines (82 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env bash
# =============================================================================
# ASR EvalKit — CLI usage examples
#
# Entry point:
# python -m asr_evalkit [OPTIONS]
# asr-evalkit [OPTIONS] (after pip install -e .)
#
# Full reference:
# python -m asr_evalkit --help
# =============================================================================
set -euo pipefail
# -----------------------------------------------------------------------------
# EXAMPLE 1 — Whisper on LibriSpeech (English, HuggingFace streaming)
# -----------------------------------------------------------------------------
python -m asr_evalkit \
--evaluator whisper \
--model openai/whisper-large-v3-turbo \
--device cuda \
--use-fp16 \
--dataset openslr/librispeech_asr \
--dataset-config clean \
--dataset-split test \
--max-samples 100 \
--language en \
--normalize-text \
--remove-punctuation \
--output-file results/whisper_librispeech.json \
--prediction-format paired \
--summary-file results/summary.json
# -----------------------------------------------------------------------------
# EXAMPLE 2 — Whisper on a NeMo manifest (local files)
# -----------------------------------------------------------------------------
# python -m asr_evalkit \
# --evaluator whisper \
# --model openai/whisper-large-v3-turbo \
# --device cuda --use-fp16 \
# --dataset-source nemo \
# --dataset data/nemo_samples/manifest.jsonl \
# --max-samples 50 \
# --normalize-text \
# --output-file results/whisper_nemo.json
# -----------------------------------------------------------------------------
# EXAMPLE 3 — MERaLiON multilingual (e.g. Tamil CommonVoice)
# -----------------------------------------------------------------------------
# python -m asr_evalkit \
# --evaluator meralion \
# --model MERaLiON/MERaLiON-2-10B-ASR \
# --device cuda --use-fp16 \
# --dataset knoveleng/ta \
# --dataset-config default \
# --dataset-split test \
# --audio-column audio \
# --text-column sentence \
# --max-samples 50 \
# --language ta \
# --normalize-text \
# --output-file results/meralion_ta.json
# -----------------------------------------------------------------------------
# EXAMPLE 4 — Qwen3-ASR via vLLM
# -----------------------------------------------------------------------------
# python -m asr_evalkit \
# --evaluator qwen3_asr \
# --model Qwen/Qwen3-ASR-1.7B \
# --device cuda \
# --dataset openslr/librispeech_asr \
# --dataset-config clean \
# --dataset-split test \
# --max-samples 50 \
# --normalize-text \
# --output-file results/qwen3_asr.json
# -----------------------------------------------------------------------------
# EXAMPLE 5 — NeMo Parakeet (filepath-native, no audio loading in Python)
# -----------------------------------------------------------------------------
# python -m asr_evalkit \
# --evaluator parakeet \
# --model nvidia/parakeet-tdt-0.6b-v3 \
# --device cuda \
# --dataset-source nemo \
# --dataset data/nemo_samples/manifest.jsonl \
# --normalize-text \
# --output-file results/parakeet_nemo.json