Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 50 additions & 2 deletions src/app/api/tts/voices/route.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { NextRequest, NextResponse } from "next/server";
import { execFile } from "node:child_process";
import { promisify } from "node:util";
import { and, eq } from "drizzle-orm";
import { db, withRetry } from "@/db";
import { companyRuntimes } from "@/db/schema";
import { buildRuntimeReadWhere, getAgentAccessContext } from "@/lib/agent-access";
import { getGatewayClientForRuntime } from "@/lib/gateway-chat-pool";
import { requireAuth } from "@/lib/require-auth";
import { GOOGLE_REALTIME_VOICES, OPENAI_TTS_VOICES, type TtsVoiceOption, type TtsProviderId } from "@/lib/tts-voices";

Expand All @@ -18,9 +23,10 @@ export async function GET(request: NextRequest) {

const provider = normalizeProvider(request.nextUrl.searchParams.get("provider"));
const query = (request.nextUrl.searchParams.get("q") || "").trim().toLowerCase();
const configuredRealtimeProviders = await listConfiguredRealtimeProviders();

const providers: TtsProviderId[] = provider === "all" ? ["openai", "google", "elevenlabs", "say", "browser"] : [provider];
const settled = await Promise.allSettled(providers.map((p) => listProviderVoices(p)));
const settled = await Promise.allSettled(providers.map((p) => listProviderVoices(p, configuredRealtimeProviders)));
const voices = settled.flatMap((result) => result.status === "fulfilled" ? result.value : []);
const filtered = query
? voices.filter((voice) =>
Expand All @@ -38,7 +44,18 @@ function normalizeProvider(value: string | null): ProviderFilter {
return "all";
}

async function listProviderVoices(provider: TtsProviderId): Promise<TtsVoiceOption[]> {
async function listProviderVoices(
provider: TtsProviderId,
configuredRealtimeProviders: Set<string> | null,
): Promise<TtsVoiceOption[]> {
if (
(provider === "openai" || provider === "google") &&
configuredRealtimeProviders &&
!configuredRealtimeProviders.has(provider)
) {
return [];
}

switch (provider) {
case "openai":
return OPENAI_TTS_VOICES;
Expand All @@ -53,6 +70,37 @@ async function listProviderVoices(provider: TtsProviderId): Promise<TtsVoiceOpti
}
}

async function listConfiguredRealtimeProviders(): Promise<Set<string> | null> {
try {
if (!db) return null;
const access = await getAgentAccessContext();
const readable = buildRuntimeReadWhere(access);
if (!readable) return null;

const [runtime] = await withRetry(() =>
db!
.select({ id: companyRuntimes.id })
.from(companyRuntimes)
.where(and(readable, eq(companyRuntimes.isPrimary, true)))
.limit(1)
);
if (!runtime) return null;

const catalog = await (await getGatewayClientForRuntime(runtime.id)).talkCatalog();
const providers = catalog.realtime?.providers;
if (!Array.isArray(providers)) return null;

return new Set(
providers
.filter((provider) => provider.configured === true)
.map((provider) => provider.id.trim().toLowerCase())
.filter(Boolean),
);
} catch {
return null;
}
}

async function listElevenLabsVoices(): Promise<TtsVoiceOption[]> {
if (!ELEVENLABS_API_KEY) return [];
const response = await fetch(`${ELEVENLABS_BASE_URL.replace(/\/$/, "")}/voices`, {
Expand Down
64 changes: 50 additions & 14 deletions src/components/voice-select-modal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ export function VoiceSelectModal({
const [provider, setProvider] = useState<ProviderFilter>("all");
const [query, setQuery] = useState("");
const [voices, setVoices] = useState<TtsVoiceOption[]>([]);
const [providerCounts, setProviderCounts] = useState<Record<string, number>>({});
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [favorites, setFavorites] = useState<string[]>([]);
Expand All @@ -108,16 +109,21 @@ export function VoiceSelectModal({
setError(null);
fetch(`/api/tts/voices?${params.toString()}`)
.then((response) => response.ok ? response.json() : Promise.reject(new Error(`Voice list failed: ${response.status}`)))
.then((data: { voices?: TtsVoiceOption[] }) => {
.then((data: { voices?: TtsVoiceOption[]; providers?: Record<string, number> }) => {
const serverVoices = Array.isArray(data.voices) ? data.voices : [];
const browserVoices = (provider === "all" || provider === "browser" || provider === "favorites")
? listBrowserVoices().filter((voice) => matchesQuery(voice, query))
: [];
setVoices(uniqueVoices([...serverVoices, ...browserVoices]));
const nextVoices = uniqueVoices([...serverVoices, ...browserVoices]);
setVoices(nextVoices);
if (provider === "all" || provider === "realtime" || provider === "favorites") {
setProviderCounts(mergeProviderCounts(readProviderCounts(data.providers), summarizeProviders(browserVoices)));
}
})
.catch((err) => {
setError(err instanceof Error ? err.message : "Unable to load voices");
setVoices([]);
setProviderCounts({});
})
.finally(() => setLoading(false));
}, [open, provider, query]);
Expand Down Expand Up @@ -233,18 +239,20 @@ export function VoiceSelectModal({
/>
<div className="flex flex-wrap gap-2">
{[{ value: "all", label: "All" }, { value: "realtime", label: "Realtime" }, { value: "favorites", label: "Favorites" }, ...TTS_PROVIDER_OPTIONS.filter((item) => item.value !== "auto")].map((item) => (
<button
key={item.value}
type="button"
onClick={() => setProvider(item.value as ProviderFilter)}
className={`rounded-full border px-3 py-1.5 text-xs transition-colors ${
provider === item.value
? "border-[#00f0ff]/60 bg-[#00f0ff]/15 text-[#00f0ff]"
: "border-[var(--border-subtle)] text-[var(--text-secondary)] hover:border-[var(--border-medium)] hover:text-[var(--text-primary)]"
}`}
>
{item.label}
</button>
shouldShowProviderFilter(item.value, providerCounts) ? (
<button
key={item.value}
type="button"
onClick={() => setProvider(item.value as ProviderFilter)}
className={`rounded-full border px-3 py-1.5 text-xs transition-colors ${
provider === item.value
? "border-[#00f0ff]/60 bg-[#00f0ff]/15 text-[#00f0ff]"
: "border-[var(--border-subtle)] text-[var(--text-secondary)] hover:border-[var(--border-medium)] hover:text-[var(--text-primary)]"
}`}
>
{item.label}
</button>
) : null
))}
</div>
</div>
Expand Down Expand Up @@ -341,6 +349,34 @@ export function VoiceSelectModal({
);
}

function summarizeProviders(voices: TtsVoiceOption[]) {
return voices.reduce<Record<string, number>>((acc, voice) => {
acc[voice.provider] = (acc[voice.provider] || 0) + 1;
return acc;
}, {});
}

function readProviderCounts(value: unknown) {
if (!value || typeof value !== "object" || Array.isArray(value)) return {};
const counts: Record<string, number> = {};
for (const [key, count] of Object.entries(value)) {
if (typeof count === "number" && Number.isFinite(count) && count > 0) counts[key] = count;
}
return counts;
}

function mergeProviderCounts(...items: Array<Record<string, number>>) {
return items.reduce<Record<string, number>>((acc, item) => {
for (const [key, count] of Object.entries(item)) acc[key] = (acc[key] || 0) + count;
return acc;
}, {});
}

function shouldShowProviderFilter(value: string, counts: Record<string, number>) {
if (value === "all" || value === "realtime" || value === "favorites") return true;
return (counts[value] || 0) > 0;
}

export function VoiceSummary({ value }: { value?: AgentVoiceSettings | null }) {
const settings = normalizeAgentVoiceSettings(value ?? DEFAULT_AGENT_VOICE_SETTINGS);
if (settings.enabled === false) return <span>Voice disabled</span>;
Expand Down
30 changes: 30 additions & 0 deletions src/lib/gateway-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,32 @@ export interface GatewayRealtimeClientToolCallResult {
idempotencyKey?: string;
}

export interface GatewayTalkCatalogProvider {
id: string;
label?: string;
configured?: boolean;
modes?: string[];
transports?: string[];
brains?: string[];
models?: string[];
voices?: string[];
defaultModel?: string;
supportsBrowserSession?: boolean;
supportsBargeIn?: boolean;
supportsToolCalls?: boolean;
[key: string]: unknown;
}

export interface GatewayTalkCatalog {
modes?: string[];
transports?: string[];
brains?: string[];
speech?: { providers?: GatewayTalkCatalogProvider[] };
transcription?: { providers?: GatewayTalkCatalogProvider[] };
realtime?: { providers?: GatewayTalkCatalogProvider[] };
[key: string]: unknown;
}

export interface GatewayCronJob {
id: string;
agentId?: string;
Expand Down Expand Up @@ -745,6 +771,10 @@ export class GatewayClient {
return this.rpc<GatewaySecretsReloadResult>("secrets.reload", {});
}

async talkCatalog(): Promise<GatewayTalkCatalog> {
return this.rpc<GatewayTalkCatalog>("talk.catalog", {});
}

async realtimeTalkSession(
params: GatewayRealtimeTalkSessionParams = {},
): Promise<GatewayRealtimeTalkSessionResult> {
Expand Down
Loading