Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/app/api/tts/voices/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { NextRequest, NextResponse } from "next/server";
import { execFile } from "node:child_process";
import { promisify } from "node:util";
import { requireAuth } from "@/lib/require-auth";
import { OPENAI_TTS_VOICES, type TtsVoiceOption, type TtsProviderId } from "@/lib/tts-voices";
import { GOOGLE_REALTIME_VOICES, OPENAI_TTS_VOICES, type TtsVoiceOption, type TtsProviderId } from "@/lib/tts-voices";

export const dynamic = "force-dynamic";

Expand All @@ -19,7 +19,7 @@ export async function GET(request: NextRequest) {
const provider = normalizeProvider(request.nextUrl.searchParams.get("provider"));
const query = (request.nextUrl.searchParams.get("q") || "").trim().toLowerCase();

const providers: TtsProviderId[] = provider === "all" ? ["openai", "elevenlabs", "say", "browser"] : [provider];
const providers: TtsProviderId[] = provider === "all" ? ["openai", "google", "elevenlabs", "say", "browser"] : [provider];
const settled = await Promise.allSettled(providers.map((p) => listProviderVoices(p)));
const voices = settled.flatMap((result) => result.status === "fulfilled" ? result.value : []);
const filtered = query
Expand All @@ -34,14 +34,16 @@ export async function GET(request: NextRequest) {
}

function normalizeProvider(value: string | null): ProviderFilter {
if (value === "openai" || value === "elevenlabs" || value === "say" || value === "browser") return value;
if (value === "openai" || value === "google" || value === "elevenlabs" || value === "say" || value === "browser") return value;
return "all";
}

async function listProviderVoices(provider: TtsProviderId): Promise<TtsVoiceOption[]> {
switch (provider) {
case "openai":
return OPENAI_TTS_VOICES;
case "google":
return GOOGLE_REALTIME_VOICES;
case "elevenlabs":
return listElevenLabsVoices();
case "say":
Expand Down
13 changes: 13 additions & 0 deletions src/lib/realtime-voice-client.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,19 @@ describe("realtime voice client helpers", () => {
});
});

it("maps Google voice selections to realtime session settings", () => {
expect(resolveRealtimeVoiceSessionSettings({
enabled: true,
provider: "google",
voiceId: "Kore",
model: "gemini-2.5-flash-native-audio-preview-12-2025",
})).toEqual({
provider: "google",
voice: "Kore",
model: "gemini-2.5-flash-native-audio-preview-12-2025",
});
});

it("does not forward non-realtime TTS voice settings", () => {
expect(resolveRealtimeVoiceSessionSettings({
enabled: true,
Expand Down
15 changes: 13 additions & 2 deletions src/lib/realtime-voice-client.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {
GOOGLE_REALTIME_VOICE_IDS,
OPENAI_REALTIME_VOICE_IDS,
normalizeAgentVoiceSettings,
type AgentVoiceSettings,
Expand Down Expand Up @@ -90,10 +91,20 @@ export function resolveRealtimeVoiceSessionSettings(
voiceSettings?: AgentVoiceSettings | null,
): Pick<RealtimeVoiceSessionRequest, "provider" | "model" | "voice"> {
const voice = normalizeAgentVoiceSettings(voiceSettings);
if (voice.enabled === false || voice.provider !== "openai") return {};
if (voice.enabled === false) return {};

const voiceId = voice.voiceId?.trim().toLowerCase();
const rawVoiceId = voice.voiceId?.trim();
const voiceId = rawVoiceId?.toLowerCase();
const model = voice.model?.trim();
if (voice.provider === "google") {
return {
provider: "google",
voice: voiceId && GOOGLE_REALTIME_VOICE_IDS.has(voiceId) ? rawVoiceId : undefined,
model: model?.includes("native-audio") || model?.includes("live") ? model : undefined,
};
}
if (voice.provider !== "openai") return {};

return {
provider: "openai",
voice: voiceId && OPENAI_REALTIME_VOICE_IDS.has(voiceId) ? voiceId : undefined,
Expand Down
18 changes: 16 additions & 2 deletions src/lib/tts-voices.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export type TtsProviderId = "openai" | "elevenlabs" | "say" | "browser";
export type TtsProviderId = "openai" | "google" | "elevenlabs" | "say" | "browser";

export interface TtsVoiceOption {
id: string;
Expand All @@ -22,6 +22,7 @@ export interface AgentVoiceSettings {
export const TTS_PROVIDER_OPTIONS: Array<{ value: TtsProviderId | "auto"; label: string; description: string }> = [
{ value: "auto", label: "Auto", description: "Use the best available device or configured backend voice" },
{ value: "openai", label: "OpenAI", description: "Cloud neural voices" },
{ value: "google", label: "Google", description: "Gemini realtime voices" },
{ value: "elevenlabs", label: "ElevenLabs", description: "Large voice library when configured" },
{ value: "say", label: "macOS say", description: "Local system voices" },
{ value: "browser", label: "Browser", description: "Web Speech voices on this device" },
Expand Down Expand Up @@ -55,6 +56,15 @@ export const OPENAI_REALTIME_VOICE_IDS = new Set([
"verse",
]);

export const GOOGLE_REALTIME_VOICES: TtsVoiceOption[] = [
{ id: "Kore", name: "Kore", provider: "google", description: "Gemini realtime default" },
{ id: "Puck", name: "Puck", provider: "google", description: "Gemini realtime voice" },
];

export const GOOGLE_REALTIME_VOICE_IDS = new Set(
GOOGLE_REALTIME_VOICES.map((voice) => voice.id.toLowerCase()),
);

export const DEFAULT_AGENT_VOICE_SETTINGS: AgentVoiceSettings = {
enabled: true,
provider: "auto",
Expand Down Expand Up @@ -104,5 +114,9 @@ export function shouldUseDeviceTts(voice: AgentVoiceSettings) {
}

export function isRealtimeVoiceOption(voice: Pick<TtsVoiceOption, "provider" | "id">) {
return voice.provider === "openai" && OPENAI_REALTIME_VOICE_IDS.has(voice.id.trim().toLowerCase());
const voiceId = voice.id.trim().toLowerCase();
return (
(voice.provider === "openai" && OPENAI_REALTIME_VOICE_IDS.has(voiceId)) ||
(voice.provider === "google" && GOOGLE_REALTIME_VOICE_IDS.has(voiceId))
);
}
Loading