diff --git a/src/app/api/tts/voices/route.ts b/src/app/api/tts/voices/route.ts index e01c55d..cdefc15 100644 --- a/src/app/api/tts/voices/route.ts +++ b/src/app/api/tts/voices/route.ts @@ -1,6 +1,11 @@ import { NextRequest, NextResponse } from "next/server"; import { execFile } from "node:child_process"; import { promisify } from "node:util"; +import { and, eq } from "drizzle-orm"; +import { db, withRetry } from "@/db"; +import { companyRuntimes } from "@/db/schema"; +import { buildRuntimeReadWhere, getAgentAccessContext } from "@/lib/agent-access"; +import { getGatewayClientForRuntime } from "@/lib/gateway-chat-pool"; import { requireAuth } from "@/lib/require-auth"; import { GOOGLE_REALTIME_VOICES, OPENAI_TTS_VOICES, type TtsVoiceOption, type TtsProviderId } from "@/lib/tts-voices"; @@ -18,9 +23,10 @@ export async function GET(request: NextRequest) { const provider = normalizeProvider(request.nextUrl.searchParams.get("provider")); const query = (request.nextUrl.searchParams.get("q") || "").trim().toLowerCase(); + const configuredRealtimeProviders = await listConfiguredRealtimeProviders(); const providers: TtsProviderId[] = provider === "all" ? ["openai", "google", "elevenlabs", "say", "browser"] : [provider]; - const settled = await Promise.allSettled(providers.map((p) => listProviderVoices(p))); + const settled = await Promise.allSettled(providers.map((p) => listProviderVoices(p, configuredRealtimeProviders))); const voices = settled.flatMap((result) => result.status === "fulfilled" ? result.value : []); const filtered = query ? voices.filter((voice) => @@ -38,7 +44,18 @@ function normalizeProvider(value: string | null): ProviderFilter { return "all"; } -async function listProviderVoices(provider: TtsProviderId): Promise { +async function listProviderVoices( + provider: TtsProviderId, + configuredRealtimeProviders: Set | null, +): Promise { + if ( + (provider === "openai" || provider === "google") && + configuredRealtimeProviders && + !configuredRealtimeProviders.has(provider) + ) { + return []; + } + switch (provider) { case "openai": return OPENAI_TTS_VOICES; @@ -53,6 +70,37 @@ async function listProviderVoices(provider: TtsProviderId): Promise | null> { + try { + if (!db) return null; + const access = await getAgentAccessContext(); + const readable = buildRuntimeReadWhere(access); + if (!readable) return null; + + const [runtime] = await withRetry(() => + db! + .select({ id: companyRuntimes.id }) + .from(companyRuntimes) + .where(and(readable, eq(companyRuntimes.isPrimary, true))) + .limit(1) + ); + if (!runtime) return null; + + const catalog = await (await getGatewayClientForRuntime(runtime.id)).talkCatalog(); + const providers = catalog.realtime?.providers; + if (!Array.isArray(providers)) return null; + + return new Set( + providers + .filter((provider) => provider.configured === true) + .map((provider) => provider.id.trim().toLowerCase()) + .filter(Boolean), + ); + } catch { + return null; + } +} + async function listElevenLabsVoices(): Promise { if (!ELEVENLABS_API_KEY) return []; const response = await fetch(`${ELEVENLABS_BASE_URL.replace(/\/$/, "")}/voices`, { diff --git a/src/components/voice-select-modal.tsx b/src/components/voice-select-modal.tsx index c8ea4b1..80f700a 100644 --- a/src/components/voice-select-modal.tsx +++ b/src/components/voice-select-modal.tsx @@ -84,6 +84,7 @@ export function VoiceSelectModal({ const [provider, setProvider] = useState("all"); const [query, setQuery] = useState(""); const [voices, setVoices] = useState([]); + const [providerCounts, setProviderCounts] = useState>({}); const [loading, setLoading] = useState(false); const [error, setError] = useState(null); const [favorites, setFavorites] = useState([]); @@ -108,16 +109,21 @@ export function VoiceSelectModal({ setError(null); fetch(`/api/tts/voices?${params.toString()}`) .then((response) => response.ok ? response.json() : Promise.reject(new Error(`Voice list failed: ${response.status}`))) - .then((data: { voices?: TtsVoiceOption[] }) => { + .then((data: { voices?: TtsVoiceOption[]; providers?: Record }) => { const serverVoices = Array.isArray(data.voices) ? data.voices : []; const browserVoices = (provider === "all" || provider === "browser" || provider === "favorites") ? listBrowserVoices().filter((voice) => matchesQuery(voice, query)) : []; - setVoices(uniqueVoices([...serverVoices, ...browserVoices])); + const nextVoices = uniqueVoices([...serverVoices, ...browserVoices]); + setVoices(nextVoices); + if (provider === "all" || provider === "realtime" || provider === "favorites") { + setProviderCounts(mergeProviderCounts(readProviderCounts(data.providers), summarizeProviders(browserVoices))); + } }) .catch((err) => { setError(err instanceof Error ? err.message : "Unable to load voices"); setVoices([]); + setProviderCounts({}); }) .finally(() => setLoading(false)); }, [open, provider, query]); @@ -233,18 +239,20 @@ export function VoiceSelectModal({ />
{[{ value: "all", label: "All" }, { value: "realtime", label: "Realtime" }, { value: "favorites", label: "Favorites" }, ...TTS_PROVIDER_OPTIONS.filter((item) => item.value !== "auto")].map((item) => ( - + shouldShowProviderFilter(item.value, providerCounts) ? ( + + ) : null ))}
@@ -341,6 +349,34 @@ export function VoiceSelectModal({ ); } +function summarizeProviders(voices: TtsVoiceOption[]) { + return voices.reduce>((acc, voice) => { + acc[voice.provider] = (acc[voice.provider] || 0) + 1; + return acc; + }, {}); +} + +function readProviderCounts(value: unknown) { + if (!value || typeof value !== "object" || Array.isArray(value)) return {}; + const counts: Record = {}; + for (const [key, count] of Object.entries(value)) { + if (typeof count === "number" && Number.isFinite(count) && count > 0) counts[key] = count; + } + return counts; +} + +function mergeProviderCounts(...items: Array>) { + return items.reduce>((acc, item) => { + for (const [key, count] of Object.entries(item)) acc[key] = (acc[key] || 0) + count; + return acc; + }, {}); +} + +function shouldShowProviderFilter(value: string, counts: Record) { + if (value === "all" || value === "realtime" || value === "favorites") return true; + return (counts[value] || 0) > 0; +} + export function VoiceSummary({ value }: { value?: AgentVoiceSettings | null }) { const settings = normalizeAgentVoiceSettings(value ?? DEFAULT_AGENT_VOICE_SETTINGS); if (settings.enabled === false) return Voice disabled; diff --git a/src/lib/gateway-client.ts b/src/lib/gateway-client.ts index 06ceb13..28ea302 100644 --- a/src/lib/gateway-client.ts +++ b/src/lib/gateway-client.ts @@ -220,6 +220,32 @@ export interface GatewayRealtimeClientToolCallResult { idempotencyKey?: string; } +export interface GatewayTalkCatalogProvider { + id: string; + label?: string; + configured?: boolean; + modes?: string[]; + transports?: string[]; + brains?: string[]; + models?: string[]; + voices?: string[]; + defaultModel?: string; + supportsBrowserSession?: boolean; + supportsBargeIn?: boolean; + supportsToolCalls?: boolean; + [key: string]: unknown; +} + +export interface GatewayTalkCatalog { + modes?: string[]; + transports?: string[]; + brains?: string[]; + speech?: { providers?: GatewayTalkCatalogProvider[] }; + transcription?: { providers?: GatewayTalkCatalogProvider[] }; + realtime?: { providers?: GatewayTalkCatalogProvider[] }; + [key: string]: unknown; +} + export interface GatewayCronJob { id: string; agentId?: string; @@ -745,6 +771,10 @@ export class GatewayClient { return this.rpc("secrets.reload", {}); } + async talkCatalog(): Promise { + return this.rpc("talk.catalog", {}); + } + async realtimeTalkSession( params: GatewayRealtimeTalkSessionParams = {}, ): Promise {