diff --git a/src/app/chat/page.tsx b/src/app/chat/page.tsx index 97efc60..2e04dcb 100644 --- a/src/app/chat/page.tsx +++ b/src/app/chat/page.tsx @@ -4954,6 +4954,7 @@ export default function ChatPage() { companyId={company?.id} sessionKey={activeThread.sessionKey} realtimeRuntimeId={selectedAgent?.runtimeId ?? undefined} + voiceSettings={resolvedVoiceSettings} /> ) : null} @@ -5962,6 +5963,7 @@ export default function ChatPage() { ? selectedSessionKey ?? gatewaySessionKeyForAgent(selectedAgent) : gatewaySessionKeyForAgent(selectedAgent)} realtimeRuntimeId={selectedAgent?.runtimeId ?? undefined} + voiceSettings={resolvedVoiceSettings} />
("idle"); const [isActive, setIsActive] = useState(false); @@ -466,16 +473,21 @@ export function VoiceAgent({ if (!realtimeEnabled || !realtimeRuntimeId) return false; try { + const realtimeVoiceSettings = resolveRealtimeVoiceSessionSettings(voiceSettings); const session = await startRealtimeVoiceSession({ runtimeId: realtimeRuntimeId, sessionKey, agentId: gatewayAgent ?? agent, + ...realtimeVoiceSettings, }); setRealtimeSession(session); recordVoiceBreadcrumb("realtime.session.start", { transport: session.transport, provider: session.provider, model: session.model, + requestedProvider: realtimeVoiceSettings.provider, + requestedVoice: realtimeVoiceSettings.voice, + requestedModel: realtimeVoiceSettings.model, hasRelaySessionId: Boolean(session.relaySessionId), }); publishAgentModeDiagnostic({ @@ -486,6 +498,9 @@ export function VoiceAgent({ transport: session.transport, provider: session.provider, model: session.model, + requestedProvider: realtimeVoiceSettings.provider, + requestedVoice: realtimeVoiceSettings.voice, + requestedModel: realtimeVoiceSettings.model, hasRelaySessionId: Boolean(session.relaySessionId), }, }); @@ -540,7 +555,7 @@ export function VoiceAgent({ }); return false; } - }, [agent, gatewayAgent, onRealtimeTranscript, realtimeEnabled, realtimeRuntimeId, recordVoiceBreadcrumb, requestWakeLock, sessionKey]); + }, [agent, gatewayAgent, onRealtimeTranscript, realtimeEnabled, realtimeRuntimeId, recordVoiceBreadcrumb, requestWakeLock, sessionKey, voiceSettings]); const activate = useCallback(async () => { onMicMutedChange?.(false); diff --git a/src/lib/realtime-voice-client.test.ts b/src/lib/realtime-voice-client.test.ts index 1ecda75..c7f8d95 100644 --- a/src/lib/realtime-voice-client.test.ts +++ b/src/lib/realtime-voice-client.test.ts @@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; import { cancelRealtimeRelayOutput, openRealtimeRelayEvents, + resolveRealtimeVoiceSessionSettings, sendRealtimeRelayAudio, sendRealtimeRelayToolCall, startRealtimeVoiceSession, @@ -116,4 +117,35 @@ describe("realtime voice client helpers", () => { "/api/runtimes/rt_1/talk/realtime/events?relaySessionId=relay+1", ); }); + + it("maps OpenAI voice selections to realtime session settings", () => { + expect(resolveRealtimeVoiceSessionSettings({ + enabled: true, + provider: "openai", + voiceId: "cedar", + model: "gpt-realtime-1.5", + })).toEqual({ + provider: "openai", + voice: "cedar", + model: "gpt-realtime-1.5", + }); + }); + + it("does not forward non-realtime TTS voice settings", () => { + expect(resolveRealtimeVoiceSessionSettings({ + enabled: true, + provider: "elevenlabs", + voiceId: "eleven_voice", + })).toEqual({}); + expect(resolveRealtimeVoiceSessionSettings({ + enabled: true, + provider: "openai", + voiceId: "onyx", + model: "tts-1", + })).toEqual({ + provider: "openai", + voice: undefined, + model: undefined, + }); + }); }); diff --git a/src/lib/realtime-voice-client.ts b/src/lib/realtime-voice-client.ts index e9f2715..86bff65 100644 --- a/src/lib/realtime-voice-client.ts +++ b/src/lib/realtime-voice-client.ts @@ -1,5 +1,20 @@ +import { normalizeAgentVoiceSettings, type AgentVoiceSettings } from "@/lib/tts-voices"; + export type RealtimeVoiceTransport = "webrtc-sdp" | "json-pcm-websocket" | "gateway-relay"; +const OPENAI_REALTIME_VOICE_IDS = new Set([ + "alloy", + "ash", + "ballad", + "cedar", + "coral", + "echo", + "marin", + "sage", + "shimmer", + "verse", +]); + export interface RealtimeVoiceSessionRequest { runtimeId: string; sessionKey?: string; @@ -80,6 +95,21 @@ export async function startRealtimeVoiceSession( }; } +export function resolveRealtimeVoiceSessionSettings( + voiceSettings?: AgentVoiceSettings | null, +): Pick { + const voice = normalizeAgentVoiceSettings(voiceSettings); + if (voice.enabled === false || voice.provider !== "openai") return {}; + + const voiceId = voice.voiceId?.trim().toLowerCase(); + const model = voice.model?.trim(); + return { + provider: "openai", + voice: voiceId && OPENAI_REALTIME_VOICE_IDS.has(voiceId) ? voiceId : undefined, + model: model?.includes("realtime") ? model : undefined, + }; +} + export async function sendRealtimeRelayAudio(runtimeId: string, chunk: RealtimeRelayAudioChunk): Promise { await postRealtimeRelay(runtimeId, { action: "audio",