Skip to content

Commit

Permalink
Merge pull request #85 from intelligentnode/83-add-whisper-model
Browse files Browse the repository at this point in the history
83 improve whisper model
  • Loading branch information
intelligentnode authored Jan 30, 2025
2 parents ddcd1e0 + 3ddf5c6 commit 670a129
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 25 deletions.
12 changes: 6 additions & 6 deletions intelli/test/integration/test_keras_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ def test_whisper_real_audio():
)

result = wrapper.transcript(
audio_data,
sample_rate=sample_rate,
language="<|en|>",
user_prompt="You are a medical expert responsible for transcribing notes from a doctor’s speech.",
condition_on_previous_text=True
)
audio_data,
sample_rate=sample_rate,
language="<|en|>",
user_prompt="You are a medical expert responsible for transcribing notes from a doctor’s speech.",
condition_on_previous_text=True,
)
assert result is not None, "Transcription result is None."
print("Transcription output:", result)

Expand Down
14 changes: 7 additions & 7 deletions intelli/utils/whisper_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ def __init__(self, model_name="whisper_tiny_en", backbone=None):
try:
import numpy as np
import tensorflow as tf
tf.config.optimizer.set_jit(True)
import librosa
import keras_hub as hub
except ImportError as e:
Expand Down Expand Up @@ -101,7 +102,7 @@ def transcribe(
audio_data,
sample_rate=16000,
language=None,
max_steps=100,
max_steps=80,
min_chunk_sec=20,
max_chunk_sec=30,
silence_top_db=40,
Expand Down Expand Up @@ -148,7 +149,7 @@ def transcribe(
running_prompt = user_prompt or ""
results = []

for (start, end) in final_chunks:
for start, end in final_chunks:
chunk_data = audio_data[start:end]

text = self._transcribe_single_chunk(
Expand All @@ -172,12 +173,13 @@ def transcribe(

return " ".join(results).strip()


def _transcribe_single_chunk(
self,
chunk_audio_data,
sample_rate=16000,
language=None,
max_steps=100,
max_steps=80,
user_prompt=None,
):
"""
Expand Down Expand Up @@ -212,9 +214,7 @@ def _transcribe_single_chunk(

# final check - everything is an integer
if any(not isinstance(x, int) for x in start_ids):
raise ValueError(
f"start_ids contains a non-integer. start_ids={start_ids}"
)
raise ValueError(f"start_ids contains a non-integer. start_ids={start_ids}")

# convert to TF tensor
decoder_ids = self.tf.constant([start_ids], dtype=self.tf.int32)
Expand Down Expand Up @@ -244,6 +244,6 @@ def _transcribe_single_chunk(
break

# slice out generated tokens - ignore the "start_ids"
final_ids = decoder_ids[0, len(start_ids):]
final_ids = decoder_ids[0, len(start_ids) :]
text = self.tokenizer.detokenize(final_ids)
return text.replace("<|endoftext|>", "").strip()
20 changes: 9 additions & 11 deletions intelli/wrappers/keras_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def _load_model(self):
"keras_nlp is not installed or model is not supported."
) from e

if "KAGGLE_USERNAME" in self.model_params:
if self.model_params and "KAGGLE_USERNAME" in self.model_params:
os.environ["KAGGLE_USERNAME"] = self.model_params["KAGGLE_USERNAME"]
os.environ["KAGGLE_KEY"] = self.model_params["KAGGLE_KEY"]

Expand All @@ -40,7 +40,6 @@ def _load_model(self):
)
elif "whisper" in self.model_name:
try:
print("---> whisper")
backbone = self.nlp_manager.models.WhisperBackbone.from_preset(
self.model_name
)
Expand Down Expand Up @@ -129,14 +128,14 @@ def fine_tune(
self.model.fit(dataset, epochs=epochs, batch_size=batch_size)

def transcript(
self,
audio_data,
sample_rate=16000,
language=None,
user_prompt=None,
condition_on_previous_text=False,
max_steps=100,
max_chunk_sec=30,
self,
audio_data,
sample_rate=16000,
language=None,
user_prompt=None,
condition_on_previous_text=False,
max_steps=80,
max_chunk_sec=30,
):
"""
Convert speech to text using the Whisper model.
Expand All @@ -155,4 +154,3 @@ def transcript(
user_prompt=user_prompt,
condition_on_previous_text=condition_on_previous_text,
)

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="intelli",
version="0.5.0",
version="0.5.1",
author="Intellinode",
author_email="[email protected]",
description="Create your chatbot or AI agent using Intellinode. We make any model smarter.",
Expand Down

0 comments on commit 670a129

Please sign in to comment.