Skip to content

Commit

Permalink
Export sense-voice to torchscript (#692)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jan 7, 2025
1 parent 60b310c commit e5b91cd
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 0 deletions.
142 changes: 142 additions & 0 deletions .github/workflows/export-sense-voice.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
name: export-sense-voice

on:
push:
branches:
- export-sense-voice
workflow_dispatch:

concurrency:
group: export-sense-voice-${{ github.ref }}
cancel-in-progress: true

jobs:
export-sense-voice:
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
name: export sense voice
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest]
python-version: ["3.10"]

steps:
- uses: actions/checkout@v4

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
shell: bash
run: |
pip install torch==2.0.0 torchaudio==2.0.1 onnxruntime onnx kaldi-native-fbank funasr numpy==1.26.4
cd /tmp
pushd /tmp
git clone --depth 1 https://github.com/modelscope/FunASR
popd
- name: Export
shell: bash
run: |
pushd scripts/sense-voice
export PYTHONPATH=/tmp/FunASR/runtime/python/libtorch:PYTHONPATH
./run.sh
- name: Collect results
shell: bash
run: |
d=sherpa-sense-voice-zh-en-ja-ko-yue-2025-01-06
mkdir $d
mv -v scripts/sense-voice/model.pt $d/
mv -v scripts/sense-voice/tokens.txt $d/
mv -v scripts/sense-voice/README.md $d/
mv -v scripts/sense-voice/bpe.model $d/
pushd $d
mkdir test_wavs
cd test_wavs
wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ja.wav
wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ko.wav
wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
popd
ls -lh $d
tar cjvf $d.tar.bz2 $d
- name: Release
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_TOKEN }}
tag: asr-models

- name: Publish ${{ matrix.model }} to huggingface
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
src=sherpa-sense-voice-zh-en-ja-ko-yue-2025-01-06
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"
export GIT_CLONE_PROTECTION_ACTIVE=false
export GIT_LFS_SKIP_SMUDGE=1
rm -rf huggingface
git clone https://csukuangfj:[email protected]/csukuangfj/$src huggingface
rm -rf huggingface/*
cp -av $src/* ./huggingface/
cd huggingface
git status
ls -lh
git lfs track "*.pt*"
git add .
git commit -m "upload $src" || true
git push https://csukuangfj:[email protected]/csukuangfj/$src main || true
- name: Publish ${{ matrix.model }} to huggingface
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
src=sherpa-sense-voice-zh-en-ja-ko-yue-2025-01-06
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"
export GIT_CLONE_PROTECTION_ACTIVE=false
export GIT_LFS_SKIP_SMUDGE=1
rm -rf huggingface
git clone https://csukuangfj:[email protected]/k2-fsa/sherpa-models huggingface
cp -av $src.tar.bz2 ./huggingface/non-streaming-asr
cd huggingface
git status
ls -lh
git lfs track "*.tar.bz2*"
git add .
git commit -m "upload $src" || true
git push https://csukuangfj:[email protected]/k2-fsa/sherpa-models main || true
54 changes: 54 additions & 0 deletions scripts/sense-voice/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)

import numpy as np
from funasr_torch import SenseVoiceSmall


def generate_tokens(m):
sp = m.tokenizer.sp
with open("tokens.txt", "w", encoding="utf-8") as f:
for i in range(sp.vocab_size()):
f.write(f"{sp.id_to_piece(i)} {i}\n")


def generate_bpe_model(m):
with open("bpe.model", "wb") as f:
f.write(m.tokenizer.sp.serialized_model_proto())


def main():
model_dir = "iic/SenseVoiceSmall"
model = SenseVoiceSmall(model_dir, batch_size=1, device="cpu")

generate_tokens(model)
generate_bpe_model(model)

meta_data = {
"model_type": "SenseVoiceSmall",
"lfr_window_size": str(model.frontend.lfr_m),
"lfr_window_shift": str(model.frontend.lfr_n),
"neg_mean": model.frontend.cmvn[0].astype(np.float32).tobytes(),
"inv_stddev": model.frontend.cmvn[1].astype(np.float32).tobytes(),
"vocab_size": str(model.tokenizer.get_vocab_size()),
"normalize_samples": "0", # input should be in the range [-32768, 32767]
"version": "1",
"model_author": "iic",
"maintainer": "k2-fsa",
"lang_auto": str(model.lid_dict["auto"]),
"lang_zh": str(model.lid_dict["zh"]),
"lang_en": str(model.lid_dict["en"]),
"lang_yue": str(model.lid_dict["yue"]), # cantonese
"lang_ja": str(model.lid_dict["ja"]),
"lang_ko": str(model.lid_dict["ko"]),
"lang_nospeech": str(model.lid_dict["nospeech"]),
"with_itn": str(model.textnorm_dict["withitn"]),
"without_itn": str(model.textnorm_dict["woitn"]),
"url": "https://huggingface.co/FunAudioLLM/SenseVoiceSmall",
}
print(meta_data)
model.ort_infer.save("model.pt", _extra_files=meta_data)


if __name__ == "__main__":
main()
13 changes: 13 additions & 0 deletions scripts/sense-voice/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

python3 ./export.py

ls -lh tokens.txt model.pt bpe.model

cat >README.md << EOF
# Introduction
Models in this file are converted from
https://www.modelscope.cn/models/iic/SenseVoiceSmall/summary
EOF

0 comments on commit e5b91cd

Please sign in to comment.