Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/app/chat/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4954,6 +4954,7 @@ export default function ChatPage() {
companyId={company?.id}
sessionKey={activeThread.sessionKey}
realtimeRuntimeId={selectedAgent?.runtimeId ?? undefined}
voiceSettings={resolvedVoiceSettings}
/>
</div>
) : null}
Expand Down Expand Up @@ -5962,6 +5963,7 @@ export default function ChatPage() {
? selectedSessionKey ?? gatewaySessionKeyForAgent(selectedAgent)
: gatewaySessionKeyForAgent(selectedAgent)}
realtimeRuntimeId={selectedAgent?.runtimeId ?? undefined}
voiceSettings={resolvedVoiceSettings}
/>
<div className={agentOverlayMode === "immersive"
? "absolute right-4 top-[max(var(--mobile-safe-top),1rem)] z-10 flex gap-2 sm:right-6"
Expand Down
19 changes: 17 additions & 2 deletions src/components/chat/voice-agent.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@ import {
startNativeVoiceSession,
stopNativeVoiceSession,
} from "@/lib/native-voice-session";
import { startRealtimeVoiceSession, type RealtimeVoiceSession } from "@/lib/realtime-voice-client";
import {
resolveRealtimeVoiceSessionSettings,
startRealtimeVoiceSession,
type RealtimeVoiceSession,
} from "@/lib/realtime-voice-client";
import { RealtimeGatewayRelaySession, type RealtimeVoiceStatus } from "@/lib/realtime-voice-gateway-relay";
import type { AgentVoiceSettings } from "@/lib/tts-voices";

type AgentState = "listening" | "processing" | "speaking" | "muted" | "idle";

Expand Down Expand Up @@ -54,6 +59,7 @@ interface VoiceAgentProps {
companyId?: string;
sessionKey?: string;
realtimeRuntimeId?: string;
voiceSettings?: AgentVoiceSettings | null;
}

function hexToRgb(hex: string): string {
Expand Down Expand Up @@ -118,6 +124,7 @@ export function VoiceAgent({
companyId,
sessionKey,
realtimeRuntimeId,
voiceSettings,
}: VoiceAgentProps) {
const [state, setState] = useState<AgentState>("idle");
const [isActive, setIsActive] = useState(false);
Expand Down Expand Up @@ -466,16 +473,21 @@ export function VoiceAgent({
if (!realtimeEnabled || !realtimeRuntimeId) return false;

try {
const realtimeVoiceSettings = resolveRealtimeVoiceSessionSettings(voiceSettings);
const session = await startRealtimeVoiceSession({
runtimeId: realtimeRuntimeId,
sessionKey,
agentId: gatewayAgent ?? agent,
...realtimeVoiceSettings,
});
setRealtimeSession(session);
recordVoiceBreadcrumb("realtime.session.start", {
transport: session.transport,
provider: session.provider,
model: session.model,
requestedProvider: realtimeVoiceSettings.provider,
requestedVoice: realtimeVoiceSettings.voice,
requestedModel: realtimeVoiceSettings.model,
hasRelaySessionId: Boolean(session.relaySessionId),
});
publishAgentModeDiagnostic({
Expand All @@ -486,6 +498,9 @@ export function VoiceAgent({
transport: session.transport,
provider: session.provider,
model: session.model,
requestedProvider: realtimeVoiceSettings.provider,
requestedVoice: realtimeVoiceSettings.voice,
requestedModel: realtimeVoiceSettings.model,
hasRelaySessionId: Boolean(session.relaySessionId),
},
});
Expand Down Expand Up @@ -540,7 +555,7 @@ export function VoiceAgent({
});
return false;
}
}, [agent, gatewayAgent, onRealtimeTranscript, realtimeEnabled, realtimeRuntimeId, recordVoiceBreadcrumb, requestWakeLock, sessionKey]);
}, [agent, gatewayAgent, onRealtimeTranscript, realtimeEnabled, realtimeRuntimeId, recordVoiceBreadcrumb, requestWakeLock, sessionKey, voiceSettings]);

const activate = useCallback(async () => {
onMicMutedChange?.(false);
Expand Down
32 changes: 32 additions & 0 deletions src/lib/realtime-voice-client.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
import {
cancelRealtimeRelayOutput,
openRealtimeRelayEvents,
resolveRealtimeVoiceSessionSettings,
sendRealtimeRelayAudio,
sendRealtimeRelayToolCall,
startRealtimeVoiceSession,
Expand Down Expand Up @@ -116,4 +117,35 @@ describe("realtime voice client helpers", () => {
"/api/runtimes/rt_1/talk/realtime/events?relaySessionId=relay+1",
);
});

it("maps OpenAI voice selections to realtime session settings", () => {
expect(resolveRealtimeVoiceSessionSettings({
enabled: true,
provider: "openai",
voiceId: "cedar",
model: "gpt-realtime-1.5",
})).toEqual({
provider: "openai",
voice: "cedar",
model: "gpt-realtime-1.5",
});
});

it("does not forward non-realtime TTS voice settings", () => {
expect(resolveRealtimeVoiceSessionSettings({
enabled: true,
provider: "elevenlabs",
voiceId: "eleven_voice",
})).toEqual({});
expect(resolveRealtimeVoiceSessionSettings({
enabled: true,
provider: "openai",
voiceId: "onyx",
model: "tts-1",
})).toEqual({
provider: "openai",
voice: undefined,
model: undefined,
});
});
});
30 changes: 30 additions & 0 deletions src/lib/realtime-voice-client.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
import { normalizeAgentVoiceSettings, type AgentVoiceSettings } from "@/lib/tts-voices";

export type RealtimeVoiceTransport = "webrtc-sdp" | "json-pcm-websocket" | "gateway-relay";

const OPENAI_REALTIME_VOICE_IDS = new Set([
"alloy",
"ash",
"ballad",
"cedar",
"coral",
"echo",
"marin",
"sage",
"shimmer",
"verse",
]);

export interface RealtimeVoiceSessionRequest {
runtimeId: string;
sessionKey?: string;
Expand Down Expand Up @@ -80,6 +95,21 @@ export async function startRealtimeVoiceSession(
};
}

export function resolveRealtimeVoiceSessionSettings(
voiceSettings?: AgentVoiceSettings | null,
): Pick<RealtimeVoiceSessionRequest, "provider" | "model" | "voice"> {
const voice = normalizeAgentVoiceSettings(voiceSettings);
if (voice.enabled === false || voice.provider !== "openai") return {};

const voiceId = voice.voiceId?.trim().toLowerCase();
const model = voice.model?.trim();
return {
provider: "openai",
voice: voiceId && OPENAI_REALTIME_VOICE_IDS.has(voiceId) ? voiceId : undefined,
model: model?.includes("realtime") ? model : undefined,
};
}

export async function sendRealtimeRelayAudio(runtimeId: string, chunk: RealtimeRelayAudioChunk): Promise<void> {
await postRealtimeRelay(runtimeId, {
action: "audio",
Expand Down
Loading