Skip to content

Commit

Permalink
Upload gigaspeech zipformer models in CI (#1412)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Dec 12, 2023
1 parent 20a82c9 commit 9e9fe79
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 11 deletions.
3 changes: 2 additions & 1 deletion .github/scripts/multi-zh-hans.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ log "----------------------------------------"
ls -lh $repo/exp

log "------------------------------------------------------------"
log "Test export streaming ONNX transducer models (Python code) "
log "Test exported streaming ONNX transducer models (Python code)"
log "------------------------------------------------------------"

log "test fp32"
Expand Down Expand Up @@ -73,6 +73,7 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
cp -v $repo/exp/*.onnx $dst
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
Expand Down
74 changes: 69 additions & 5 deletions .github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,80 @@ git lfs pull --include "data/lang_bpe_500/bpe.model"
git lfs pull --include "data/lang_bpe_500/tokens.txt"
git lfs pull --include "exp/jit_script.pt"
git lfs pull --include "exp/pretrained.pt"
ln -s pretrained.pt epoch-99.pt
ls -lh *.pt
rm epoch-30.pt
ln -s pretrained.pt epoch-30.pt
rm *.onnx
ls -lh
popd

log "----------------------------------------"
log "Export ONNX transducer models "
log "----------------------------------------"

./zipformer/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 30 \
--avg 1 \
--exp-dir $repo/exp

ls -lh $repo/exp

log "------------------------------------------------------------"
log "Test exported ONNX transducer models (Python code) "
log "------------------------------------------------------------"

log "test fp32"
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav

log "test int8"
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.int8.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.int8.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav

log "Upload models to huggingface"
git config --global user.name "k2-fsa"
git config --global user.email "[email protected]"

url=https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-gigaspeech-2023-12-12
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
cp -v $repo/exp/*.onnx $dst
cp -v $repo/data/lang_bpe_500/tokens.txt $dst
cp -v $repo/data/lang_bpe_500/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx"
git add .
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true

log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh
mv -v $dst.tar.bz2 ../../../

log "Export to torchscript model"
./zipformer/export.py \
--exp-dir $repo/exp \
--use-averaged-model false \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 99 \
--epoch 30 \
--avg 1 \
--jit 1

Expand Down Expand Up @@ -67,7 +131,7 @@ echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
mkdir -p zipformer/exp
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/

ls -lh data
Expand All @@ -83,7 +147,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==

./zipformer/decode.py \
--decoding-method $method \
--epoch 999 \
--epoch 30 \
--avg 1 \
--use-averaged-model 0 \
--max-duration $max_duration \
Expand Down
5 changes: 0 additions & 5 deletions .github/workflows/multi-zh-hans.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@ name: run-multi-zh-hans

on:
push:
branches:
- master
- upload-ctc-model

pull_request:
branches:
- master

Expand Down
14 changes: 14 additions & 0 deletions .github/workflows/run-gigaspeech-zipformer-2023-10-17.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ on:
push:
branches:
- master

pull_request:
types: [labeled]

Expand All @@ -33,6 +34,8 @@ on:
# nightly build at 15:50 UTC time every day
- cron: "50 15 * * *"

workflow_dispatch:

concurrency:
group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }}
cancel-in-progress: true
Expand Down Expand Up @@ -85,6 +88,7 @@ jobs:
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
mkdir -p egs/gigaspeech/ASR/data
ln -sfv ~/tmp/fbank-libri egs/gigaspeech/ASR/data/fbank
Expand All @@ -97,6 +101,16 @@ jobs:
.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models

- name: Display decoding results for gigaspeech zipformer
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
shell: bash
Expand Down

0 comments on commit 9e9fe79

Please sign in to comment.