Skip to content

Commit

Permalink
Create model service to allow dynamic model fetching for audio to tex…
Browse files Browse the repository at this point in the history
…t functionality
  • Loading branch information
AryanK1511 committed Dec 5, 2024
1 parent ddfdfbe commit 91921f8
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 21 deletions.
1 change: 1 addition & 0 deletions src/hooks/use-file-import.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ async function processFile(
file: File,
settings: ReturnType<typeof getSettings>
): Promise<string | JinaAiReaderResponse | OpenAISpeechToTextResponse> {
console.log(file.type);
if (file.type.startsWith("image/")) {
return await compressImageToBase64(file, {
compressionFactor: settings.compressionFactor,
Expand Down
47 changes: 26 additions & 21 deletions src/lib/ai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import {
import { ChatCraftModel } from "./ChatCraftModel";
import { getSettings } from "./settings";
import { usingOfficialOpenAI } from "./providers";
import { ModelService } from "./model-service";
import { SpeechRecognition } from "./speech-recognition";

export type ChatOptions = {
model?: ChatCraftModel;
Expand Down Expand Up @@ -469,34 +471,37 @@ export type OpenAISpeechToTextResponse = {
};

/**
* Convert an audio file to text using https://platform.openai.com/docs/api-reference/audio/createTranscription?lang=node
* Convert an audio file to text
*/

export async function audioToText(file: File): Promise<OpenAISpeechToTextResponse> {
try {
const { currentProvider } = getSettings();
if (!currentProvider.apiKey) {
throw new Error("Missing OpenAI API Key");
}
const settings = getSettings();
const currentProvider = settings.currentProvider;

const { openai } = currentProvider.createClient(currentProvider.apiKey);
if (!currentProvider.apiKey) {
throw new Error("Missing API Key");
}

const response = await openai.audio.transcriptions.create({
file,
model: "whisper-1",
});
const sttClient = await ModelService.getSpeechToTextClient();

if (!response.text) {
throw new Error("Error: No transcription text returned by OpenAI.");
}
if (!sttClient) {
throw new Error("No STT client available");
}

const result: OpenAISpeechToTextResponse = {
text: response.text,
};
const sttModel = await ModelService.getSpeechToTextModel(currentProvider);

return result;
} catch (err) {
console.error("Error converting audio to text", err);
throw err;
if (!sttModel) {
throw new Error(`No speech-to-text model found for provider ${currentProvider.name}`);
}

const recognition = new SpeechRecognition(sttModel, sttClient);

try {
const text = await recognition.transcribe(file);
return { text };
} catch (error) {
console.error("Error transcribing audio:", error);
throw error;
}
}

Expand Down
33 changes: 33 additions & 0 deletions src/lib/model-service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { ChatCraftProvider } from "./ChatCraftProvider";
import { getSettings } from "./settings";
import { isSpeechToTextModel } from "./ai";

export class ModelService {
static async getSpeechToTextClient() {
const settings = getSettings();
const provider = settings.currentProvider;

if (!provider.apiKey) {
return null;
}

return provider.createClient(provider.apiKey).openai;
}

static async getSpeechToTextModel(provider: ChatCraftProvider): Promise<string | null> {
if (!provider.apiKey) {
return null;
}
const models: string[] = await provider.queryModels(provider.apiKey);
const sttModel = models.find((model) => isSpeechToTextModel(model));
return sttModel || null;
}

static async isSpeechToTextSupported(provider: ChatCraftProvider): Promise<boolean> {
if (!provider.apiKey) {
return false;
}
const models: string[] = await provider.queryModels(provider.apiKey);
return models.some((model) => isSpeechToTextModel(model));
}
}

0 comments on commit 91921f8

Please sign in to comment.