Export sense-voice to torchscript (#692)

k2-fsa · Jan 7, 2025 · e5b91cd · e5b91cd
1 parent 60b310c
commit e5b91cd
Show file tree

Hide file tree

Showing 3 changed files with 209 additions and 0 deletions.
diff --git a/.github/workflows/export-sense-voice.yml b/.github/workflows/export-sense-voice.yml
@@ -0,0 +1,142 @@
+name: export-sense-voice
+
+on:
+  push:
+    branches:
+      - export-sense-voice
+  workflow_dispatch:
+
+concurrency:
+  group: export-sense-voice-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  export-sense-voice:
+    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+    name: export sense voice
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest]
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          pip install torch==2.0.0 torchaudio==2.0.1 onnxruntime onnx kaldi-native-fbank funasr numpy==1.26.4
+
+          cd /tmp
+
+          pushd /tmp
+          git clone --depth 1 https://github.com/modelscope/FunASR
+          popd
+
+      - name: Export
+        shell: bash
+        run: |
+          pushd scripts/sense-voice
+          export PYTHONPATH=/tmp/FunASR/runtime/python/libtorch:PYTHONPATH
+          ./run.sh
+
+      - name: Collect results
+        shell: bash
+        run: |
+          d=sherpa-sense-voice-zh-en-ja-ko-yue-2025-01-06
+          mkdir $d
+          mv -v scripts/sense-voice/model.pt $d/
+          mv -v scripts/sense-voice/tokens.txt $d/
+          mv -v scripts/sense-voice/README.md $d/
+          mv -v scripts/sense-voice/bpe.model $d/
+
+          pushd $d
+          mkdir test_wavs
+          cd test_wavs
+          wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
+          wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ja.wav
+          wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ko.wav
+          wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
+          wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
+          popd
+
+          ls -lh $d
+          tar cjvf $d.tar.bz2 $d
+
+      - name: Release
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          file: ./*.tar.bz2
+          overwrite: true
+          repo_name: k2-fsa/sherpa
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_TOKEN }}
+          tag: asr-models
+
+      - name: Publish ${{ matrix.model }} to huggingface
+        shell: bash
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          src=sherpa-sense-voice-zh-en-ja-ko-yue-2025-01-06
+
+          git config --global user.email "[email protected]"
+          git config --global user.name "Fangjun Kuang"
+
+          export GIT_CLONE_PROTECTION_ACTIVE=false
+
+          export GIT_LFS_SKIP_SMUDGE=1
+
+          rm -rf huggingface
+          git clone https://csukuangfj:[email protected]/csukuangfj/$src huggingface
+
+          rm -rf huggingface/*
+
+          cp -av $src/* ./huggingface/
+
+          cd huggingface
+
+          git status
+          ls -lh
+          git lfs track "*.pt*"
+
+          git add .
+          git commit -m "upload $src" || true
+          git push https://csukuangfj:[email protected]/csukuangfj/$src main || true
+
+      - name: Publish ${{ matrix.model }} to huggingface
+        shell: bash
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          src=sherpa-sense-voice-zh-en-ja-ko-yue-2025-01-06
+
+          git config --global user.email "[email protected]"
+          git config --global user.name "Fangjun Kuang"
+
+          export GIT_CLONE_PROTECTION_ACTIVE=false
+
+          export GIT_LFS_SKIP_SMUDGE=1
+
+          rm -rf huggingface
+          git clone https://csukuangfj:[email protected]/k2-fsa/sherpa-models huggingface
+
+          cp -av $src.tar.bz2 ./huggingface/non-streaming-asr
+
+          cd huggingface
+
+          git status
+          ls -lh
+          git lfs track "*.tar.bz2*"
+
+          git add .
+          git commit -m "upload $src" || true
+          git push https://csukuangfj:[email protected]/k2-fsa/sherpa-models main || true
+
diff --git a/scripts/sense-voice/export.py b/scripts/sense-voice/export.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+import numpy as np
+from funasr_torch import SenseVoiceSmall
+
+
+def generate_tokens(m):
+    sp = m.tokenizer.sp
+    with open("tokens.txt", "w", encoding="utf-8") as f:
+        for i in range(sp.vocab_size()):
+            f.write(f"{sp.id_to_piece(i)} {i}\n")
+
+
+def generate_bpe_model(m):
+    with open("bpe.model", "wb") as f:
+        f.write(m.tokenizer.sp.serialized_model_proto())
+
+
+def main():
+    model_dir = "iic/SenseVoiceSmall"
+    model = SenseVoiceSmall(model_dir, batch_size=1, device="cpu")
+
+    generate_tokens(model)
+    generate_bpe_model(model)
+
+    meta_data = {
+        "model_type": "SenseVoiceSmall",
+        "lfr_window_size": str(model.frontend.lfr_m),
+        "lfr_window_shift": str(model.frontend.lfr_n),
+        "neg_mean": model.frontend.cmvn[0].astype(np.float32).tobytes(),
+        "inv_stddev": model.frontend.cmvn[1].astype(np.float32).tobytes(),
+        "vocab_size": str(model.tokenizer.get_vocab_size()),
+        "normalize_samples": "0",  # input should be in the range [-32768, 32767]
+        "version": "1",
+        "model_author": "iic",
+        "maintainer": "k2-fsa",
+        "lang_auto": str(model.lid_dict["auto"]),
+        "lang_zh": str(model.lid_dict["zh"]),
+        "lang_en": str(model.lid_dict["en"]),
+        "lang_yue": str(model.lid_dict["yue"]),  # cantonese
+        "lang_ja": str(model.lid_dict["ja"]),
+        "lang_ko": str(model.lid_dict["ko"]),
+        "lang_nospeech": str(model.lid_dict["nospeech"]),
+        "with_itn": str(model.textnorm_dict["withitn"]),
+        "without_itn": str(model.textnorm_dict["woitn"]),
+        "url": "https://huggingface.co/FunAudioLLM/SenseVoiceSmall",
+    }
+    print(meta_data)
+    model.ort_infer.save("model.pt", _extra_files=meta_data)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/sense-voice/run.sh b/scripts/sense-voice/run.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+python3 ./export.py
+
+ls -lh tokens.txt model.pt bpe.model
+
+cat >README.md << EOF
+# Introduction
+
+Models in this file are converted from
+https://www.modelscope.cn/models/iic/SenseVoiceSmall/summary
+
+EOF