-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtest_english_asr.py
33 lines (25 loc) · 1.25 KB
/
test_english_asr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# 微软ASR接口
from transformers import SpeechT5Processor, SpeechT5ForSpeechToText
from datasets import load_dataset
dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
dataset = dataset.sort("id")
sampling_rate = dataset.features["audio"].sampling_rate
example_speech = dataset[0]["audio"]["array"]
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_asr")
model = SpeechT5ForSpeechToText.from_pretrained("microsoft/speecht5_asr")
inputs = processor(audio=example_speech, sampling_rate=sampling_rate, return_tensors="pt")
predicted_ids = model.generate(**inputs, max_length=100)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
print(transcription[0])
# 达摩院ASR
from funasr import AutoModel
# paraformer-zh is a multi-functional asr model
# use vad, punc, spk or not as you need
model = AutoModel(model="paraformer-zh", model_revision="v2.0.4",
vad_model="fsmn-vad", vad_model_revision="v2.0.4",
punc_model="ct-punc-c", punc_model_revision="v2.0.4",
)
res = model.generate(input="./demo/video_processed/W015_neu_1_002/W015_neu_1_002.wav",
batch_size_s=300,
hotword='魔搭')
print(res[0]['text'])