From 497a3e78d7d6cea3d39ac022eed7f1224e235959 Mon Sep 17 00:00:00 2001 From: dev-Develope Date: Tue, 9 Jun 2026 16:33:35 +0530 Subject: [PATCH] added 60DB services --- .../src/components/icons/ProviderLogos.tsx | 27 +++ .../implementations/SixtydbProvider.ts | 207 ++++++++++++++++ frontend/src/providers/index.ts | 1 + frontend/src/providers/registry.ts | 6 + frontend/src/types/asr/common.ts | 1 + frontend/src/types/asr/vendors/sixtydb.ts | 60 +++++ server/src/index.ts | 8 + server/src/sixtydbProxy.ts | 6 + shared/sixtydbProxyCore.ts | 228 ++++++++++++++++++ 9 files changed, 544 insertions(+) create mode 100644 frontend/src/providers/implementations/SixtydbProvider.ts create mode 100644 frontend/src/types/asr/vendors/sixtydb.ts create mode 100644 server/src/sixtydbProxy.ts create mode 100644 shared/sixtydbProxyCore.ts diff --git a/frontend/src/components/icons/ProviderLogos.tsx b/frontend/src/components/icons/ProviderLogos.tsx index 1762086..da068b7 100644 --- a/frontend/src/components/icons/ProviderLogos.tsx +++ b/frontend/src/components/icons/ProviderLogos.tsx @@ -231,6 +231,32 @@ export function CloudflareLogo({ size = defaultSize, className, ...props }: Logo ) } +export function SixtydbLogo({ size = defaultSize, className, ...props }: LogoProps) { + return ( + + + + 60db + + + ) +} + export function WhisperCppLogo({ size = defaultSize, className }: LogoProps) { return ( JSX.Element> = cloudflare: CloudflareLogo, local_openai: OpenAILogo, local_whisper_cpp: WhisperCppLogo, + sixtydb: SixtydbLogo, } export function getProviderLogo( diff --git a/frontend/src/providers/implementations/SixtydbProvider.ts b/frontend/src/providers/implementations/SixtydbProvider.ts new file mode 100644 index 0000000..eb375c6 --- /dev/null +++ b/frontend/src/providers/implementations/SixtydbProvider.ts @@ -0,0 +1,207 @@ +/** + * 60db STT Realtime ASR Provider. + * + * Connects through the local proxy at ws://localhost:23456/ws/sixtydb + * (parity with ElevenLabsProvider). The proxy hides the api key from + * the browser DevTools and normalizes 60db's two-phase finals into the + * standard partial/final contract. + * + * Docs: https://docs.60db.ai/api-reference/websocket/stt + */ + +import { BaseASRProvider } from '../base' +import type { + ASRProviderInfo, + ProviderConfig, + ASRVendor, +} from '../../types/asr' +import { + SIXTYDB_SUPPORTED_LANGUAGES, +} from '../../types/asr/vendors/sixtydb' + +const PROXY_WS_URL = 'ws://localhost:23456/ws/sixtydb' + +export class SixtydbProvider extends BaseASRProvider { + readonly id: ASRVendor = 'sixtydb' as ASRVendor + + readonly info: ASRProviderInfo = { + id: 'sixtydb' as ASRVendor, + name: '60db', + description: + '60db real-time speech-to-text. ~40 languages including Indic + English code-switching, sentence-based continuous mode, optional speaker diarization.', + type: 'cloud', + supportsStreaming: true, + capabilities: { + audioInputMode: 'pcm16', + audioProfile: { + payloadFormat: 'pcm16', + sampleRateHz: 16000, + channels: 1, + preferredChunkMs: 100, + }, + transport: { + type: 'realtime', + captureRestartStrategy: 'reuse-session', + }, + prompting: { + supportsLanguageHints: true, + }, + workloads: { + liveCapture: { + availability: 'implemented', + executionMode: 'realtime-stream', + inputSources: ['system-audio'], + acceptedFileKinds: ['audio'], + }, + fileTranscription: { + availability: 'compatible', + executionMode: 'single-request', + inputSources: ['file'], + acceptedFileKinds: ['audio', 'video'], + }, + }, + supportsConfigTest: true, + }, + requiredConfigKeys: ['apiKey'], + supportedLanguages: [...SIXTYDB_SUPPORTED_LANGUAGES], + website: 'https://60db.ai', + docsUrl: 'https://docs.60db.ai/api-reference/websocket/stt', + configFields: [ + { + key: 'apiKey', + label: 'API Key', + type: 'password', + required: true, + placeholder: 'sk_live_...', + description: 'Get your 60db API key from docs.60db.ai', + }, + { + key: 'languageHints', + label: 'Language Hints', + type: 'text', + required: false, + placeholder: 'en, hi', + description: 'Comma-separated ISO 639-1 codes (max 5). Omit for auto-detect.', + }, + ], + } + + private ws: WebSocket | null = null + private wsReady = false + + async connect(config: ProviderConfig): Promise { + const apiKey = config.apiKey as string + + if (!apiKey) { + this.emitError(this.createError('MISSING_API_KEY', '60db API key is required')) + return + } + + this._config = config + this.setState('connecting') + + return new Promise((resolve, reject) => { + try { + const params = new URLSearchParams({ + apiKey, + language: (config.language as string) || '', + }) + + const proxyUrl = `${PROXY_WS_URL}?${params.toString()}` + console.log('[SixtydbProvider] connecting to proxy...') + + this.ws = new WebSocket(proxyUrl) + + this.ws.onopen = () => { + console.log('[SixtydbProvider] proxy connected, awaiting 60db session_started...') + } + + this.ws.onmessage = (event) => { + try { + const msg = JSON.parse(event.data) + + switch (msg.type) { + case 'ready': + console.log('[SixtydbProvider] 60db session ready') + this.wsReady = true + this.setState('connected') + resolve() + break + + case 'partial': + if (msg.text) { + console.log('[SixtydbProvider] partial:', msg.text.substring(0, 50)) + this.emitPartial(msg.text) + } + break + + case 'final': + console.log('[SixtydbProvider] final:', msg.text) + this.emitFinal(msg.text || '') + this.emitFinished() + break + + case 'error': + console.error('[SixtydbProvider] server error:', msg.message) + this.emitError(this.createError('SERVER_ERROR', msg.message || 'Server error')) + break + } + } catch (e) { + console.error('[SixtydbProvider] failed to parse message:', e) + } + } + + this.ws.onerror = (error) => { + console.error('[SixtydbProvider] WebSocket error:', error) + this.emitError(this.createError('WEBSOCKET_ERROR', 'WebSocket connection error — make sure the local proxy is running')) + reject(new Error('WebSocket connection error')) + } + + this.ws.onclose = (event) => { + console.log('[SixtydbProvider] WebSocket closed:', event.code, event.reason) + this.wsReady = false + this.setState('idle') + } + } catch (error) { + console.error('[SixtydbProvider] connect failed:', error) + this.emitError(this.createError('CONNECTION_ERROR', 'Connection failed')) + reject(error) + } + }) + } + + async disconnect(): Promise { + console.log('[SixtydbProvider] disconnecting...') + + if (this.ws && this.wsReady) { + this.ws.send(JSON.stringify({ type: 'audio_end' })) + } + + await new Promise(resolve => setTimeout(resolve, 500)) + + if (this.ws) { + this.ws.close(1000, 'disconnect') + this.ws = null + } + + this.wsReady = false + this.setState('idle') + } + + sendAudio(data: Blob | ArrayBuffer): void { + if (!this.ws || !this.wsReady) { + console.warn('[SixtydbProvider] WebSocket not ready, dropping audio') + return + } + + this.setState('recording') + + if (data instanceof Blob) { + data.arrayBuffer().then(buffer => { + this.ws?.send(buffer) + }) + } else { + this.ws.send(data) + } + } +} diff --git a/frontend/src/providers/index.ts b/frontend/src/providers/index.ts index 427fc54..4038c05 100644 --- a/frontend/src/providers/index.ts +++ b/frontend/src/providers/index.ts @@ -19,3 +19,4 @@ export { AssemblyAIProvider } from './implementations/AssemblyAIProvider' export { ElevenLabsProvider } from './implementations/ElevenLabsProvider' export { LocalOpenAIProvider } from './implementations/LocalOpenAIProvider' export { WhisperCppRuntimeProvider } from './implementations/WhisperCppRuntimeProvider' +export { SixtydbProvider } from './implementations/SixtydbProvider' diff --git a/frontend/src/providers/registry.ts b/frontend/src/providers/registry.ts index c83ed83..314bd36 100644 --- a/frontend/src/providers/registry.ts +++ b/frontend/src/providers/registry.ts @@ -21,6 +21,7 @@ import { GladiaProvider } from './implementations/GladiaProvider' import { CloudflareProvider } from './implementations/CloudflareProvider' import { LocalOpenAIProvider } from './implementations/LocalOpenAIProvider' import { WhisperCppRuntimeProvider } from './implementations/WhisperCppRuntimeProvider' +import { SixtydbProvider } from './implementations/SixtydbProvider' // Provider 注册表 class ProviderRegistry { @@ -131,6 +132,11 @@ function registerDefaultProviders(): void { info: new WhisperCppRuntimeProvider().info, create: () => new WhisperCppRuntimeProvider(), }) + + providerRegistry.register({ + info: new SixtydbProvider().info, + create: () => new SixtydbProvider(), + }) } // 初始化注册 diff --git a/frontend/src/types/asr/common.ts b/frontend/src/types/asr/common.ts index 1443986..a4dbf71 100644 --- a/frontend/src/types/asr/common.ts +++ b/frontend/src/types/asr/common.ts @@ -11,6 +11,7 @@ export enum ASRVendor { Cloudflare = 'cloudflare', LocalOpenAI = 'local_openai', LocalWhisperCpp = 'local_whisper_cpp', + Sixtydb = 'sixtydb', } export type ProviderType = 'cloud' | 'local' diff --git a/frontend/src/types/asr/vendors/sixtydb.ts b/frontend/src/types/asr/vendors/sixtydb.ts new file mode 100644 index 0000000..5547b60 --- /dev/null +++ b/frontend/src/types/asr/vendors/sixtydb.ts @@ -0,0 +1,60 @@ +/** + * 60db STT Realtime ASR vendor-specific types. + * + * Docs: https://docs.60db.ai/api-reference/websocket/stt + */ + +export const SIXTYDB_DEFAULT_MODEL = '60db-stt-v01' + +// 60db supports these languages plus auto-detect. The "multi" entry is a +// placeholder for the auto-detect/multi-language session feature (up to 5 +// languages per session) — when languages is omitted in the start message, +// 60db auto-detects. +export const SIXTYDB_SUPPORTED_LANGUAGES = [ + 'en', 'es', 'fr', 'de', 'it', 'pt', 'nl', 'pl', 'ru', 'uk', + 'cs', 'sv', 'ar', + 'hi', 'bn', 'mr', 'pa', 'gu', 'ta', 'te', 'kn', 'ml', 'or', + 'as', 'ne', 'sa', + 'multi', +] as const + +export type SixtydbSupportedLanguage = typeof SIXTYDB_SUPPORTED_LANGUAGES[number] + +// Server-emitted transcription event (minimal shape that the proxy parses). +// The proxy normalizes 60db's two-phase finals into a single 'partial' / +// 'final' contract before forwarding to the client, so the provider doesn't +// need to see this directly — but the type is kept here for documentation. +export interface SixtydbTranscriptionEvent { + type: 'transcription' + text: string + confidence?: number + language?: string + is_final?: boolean + speech_final?: boolean + sentence_id?: number + words?: Array<{ + word: string + start: number + end: number + confidence?: number + }> + speakers?: Array<{ speaker: string; start: number; end: number }> +} + +export interface SixtydbSessionStartedEvent { + type: 'session_started' + session_id: string + language?: string + model?: string +} + +export interface SixtydbErrorEvent { + type: 'error' + error: string + error_code?: string +} + +export type SixtydbServerEvent = + | SixtydbTranscriptionEvent + | SixtydbSessionStartedEvent + | SixtydbErrorEvent diff --git a/server/src/index.ts b/server/src/index.ts index 7294e5b..4723926 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -10,6 +10,7 @@ import { createDeepgramProxyServer } from './deepgramProxy.js' import { createAssemblyAIProxyServer } from './assemblyaiProxy.js' import { createElevenLabsProxyServer } from './elevenlabsProxy.js' import { createGladiaProxyServer } from './gladiaProxy.js' +import { createSixtydbProxyServer } from './sixtydbProxy.js' const __filename = fileURLToPath(import.meta.url) const __dirname = path.dirname(__filename) @@ -37,6 +38,9 @@ createElevenLabsProxyServer(elevenlabsWss) const gladiaWss = new WebSocketServer({ noServer: true }) createGladiaProxyServer(gladiaWss) +const sixtydbWss = new WebSocketServer({ noServer: true }) +createSixtydbProxyServer(sixtydbWss) + server.on('upgrade', (request, socket, head) => { const { pathname } = new URL(request.url || '', `http://${request.headers.host}`) @@ -64,6 +68,10 @@ server.on('upgrade', (request, socket, head) => { gladiaWss.handleUpgrade(request, socket, head, (ws) => { gladiaWss.emit('connection', ws, request) }) + } else if (pathname === '/ws/sixtydb') { + sixtydbWss.handleUpgrade(request, socket, head, (ws) => { + sixtydbWss.emit('connection', ws, request) + }) } else { socket.destroy() } diff --git a/server/src/sixtydbProxy.ts b/server/src/sixtydbProxy.ts new file mode 100644 index 0000000..1d2842c --- /dev/null +++ b/server/src/sixtydbProxy.ts @@ -0,0 +1,6 @@ +import type { WebSocketServer } from 'ws' +import { attachSixtydbProxyServer } from '../../shared/sixtydbProxyCore.js' + +export function createSixtydbProxyServer(wss: WebSocketServer): void { + attachSixtydbProxyServer(wss) +} diff --git a/shared/sixtydbProxyCore.ts b/shared/sixtydbProxyCore.ts new file mode 100644 index 0000000..8f08d0a --- /dev/null +++ b/shared/sixtydbProxyCore.ts @@ -0,0 +1,228 @@ +/** + * 60db STT Realtime ASR proxy core. + * + * Architectural parity with elevenlabsProxyCore.ts. 60db authenticates via + * `?apiKey=` in the URL — the proxy is largely a passthrough but still gives: + * - api key never appears in browser DevTools network tab + * - centralized logging + * - consistent cancellation + error handling with the other 12 providers + * + * 60db STT WS protocol (wss://api.60db.ai/ws/stt): + * server → client: connection_established → connected → session_started → + * speech_started → transcription (interim/final) → session_stopped + * client → server: { type: "start", languages, config: { encoding, sample_rate, ... } } + * raw binary PCM16 frames (no JSON wrapping needed) + * { type: "stop" } + */ + +import type { IncomingMessage } from 'http' +import { URL } from 'url' +import { WebSocket as NodeWebSocket, type WebSocketServer } from 'ws' +import { getWsProxyAgent } from './proxyAgent' + +const SIXTYDB_WS_BASE = 'wss://api.60db.ai/ws/stt' + +interface SixtydbProxyConfig { + apiKey: string + language?: string + diarize?: boolean +} + +function parseProxyConfig(req: IncomingMessage): SixtydbProxyConfig { + const url = new URL(req.url || '', `http://${req.headers.host}`) + return { + apiKey: url.searchParams.get('apiKey') || '', + language: url.searchParams.get('language') || '', + diarize: url.searchParams.get('diarize') === 'true', + } +} + +function handleSixtydbConnection(clientWs: NodeWebSocket, req: IncomingMessage): void { + console.log('[SixtydbProxy] new client connection') + + const config = parseProxyConfig(req) + if (!config.apiKey) { + console.error('[SixtydbProxy] missing apiKey') + clientWs.close(4001, 'Missing apiKey') + return + } + + const wsUrl = `${SIXTYDB_WS_BASE}?apiKey=${encodeURIComponent(config.apiKey)}` + console.log(`[SixtydbProxy] dialing upstream: language=${config.language || 'auto'} diarize=${config.diarize}`) + + const agent = getWsProxyAgent() + const upstream = new NodeWebSocket(wsUrl, { agent }) + + let sessionReady = false + let clientClosed = false + + upstream.on('open', () => { + console.log('[SixtydbProxy] upstream connected, waiting for session_started...') + }) + + upstream.on('message', (data: Buffer) => { + if (clientClosed) return + + let msg: Record + try { + msg = JSON.parse(data.toString()) + } catch (err) { + console.error('[SixtydbProxy] failed to parse upstream message:', err) + return + } + + // Handshake: connection_established (outer-key) → send start. + if (msg.connection_established) { + const languages = config.language ? [config.language] : null + const startMsg = { + type: 'start', + languages, + config: { + encoding: 'linear', + sample_rate: 16000, + continuous_mode: true, + utterance_end_ms: 500, + interim_results_frequency: 300, + diarize: !!config.diarize, + audio_enhancement: 'adaptive', + }, + } + upstream.send(JSON.stringify(startMsg)) + return + } + + if (msg.type === 'connected') { + // Proxy ready notice; we wait for session_started before announcing to client. + return + } + + if (msg.type === 'session_started') { + console.log(`[SixtydbProxy] session_started: id=${(msg as { session_id?: string }).session_id}`) + sessionReady = true + clientWs.send(JSON.stringify({ type: 'ready' })) + return + } + + if (msg.type === 'transcription') { + const text = (msg as { text?: string }).text || '' + if (!text) return + // 60db two-phase: is_final=true + speech_final=false is the fast first emit; + // speech_final=true is the canonical answer. We mirror Deepgram-style by + // treating only speech_final as the "final" event, everything else as partial. + const speechFinal = !!(msg as { speech_final?: boolean }).speech_final + if (speechFinal) { + console.log(`[SixtydbProxy] final: ${text.substring(0, 60)}`) + clientWs.send(JSON.stringify({ type: 'final', text, raw: msg })) + } else { + clientWs.send(JSON.stringify({ type: 'partial', text, raw: msg })) + } + return + } + + if (msg.type === 'error') { + const errorMsg = (msg as { error?: string }).error || 'Unknown error' + const errorCode = (msg as { error_code?: string }).error_code || 'unknown' + console.error(`[SixtydbProxy] upstream error: ${errorCode} - ${errorMsg}`) + clientWs.send(JSON.stringify({ + type: 'error', + message: `60db: ${errorCode} - ${errorMsg}`, + })) + return + } + + if (msg.type === 'session_stopped') { + console.log('[SixtydbProxy] session_stopped') + } + }) + + upstream.on('error', (error) => { + console.error('[SixtydbProxy] upstream WebSocket error:', error.message) + if (!clientClosed) { + clientWs.send(JSON.stringify({ + type: 'error', + message: formatSixtydbConnectionError(error), + })) + clientWs.close(4002, '60db WebSocket error') + } + }) + + upstream.on('close', (code, reason) => { + console.log(`[SixtydbProxy] upstream closed: ${code} ${reason}`) + if (!clientClosed) { + const safeCode = (code === 1000 || (code >= 3000 && code <= 4999)) ? code : 1000 + clientWs.close(safeCode, reason.toString()) + } + }) + + clientWs.on('message', (data: Buffer) => { + if (!sessionReady) { + console.warn('[SixtydbProxy] session not ready, dropping data') + return + } + + // Browser sends raw PCM16 binary frames OR a JSON control message. + // 60db accepts raw binary frames natively (auto-detect on first binary + // frame per docs) — pass them straight through. + let isBinary = true + try { + const text = data.toString('utf-8') + if (text.startsWith('{')) { + const message = JSON.parse(text) + isBinary = false + if (message.type === 'audio_end' || message.type === 'terminate') { + // Tell 60db to wrap up; it will reply with session_stopped + billing summary. + upstream.send(JSON.stringify({ type: 'stop' })) + console.log('[SixtydbProxy] forwarded stop to upstream') + return + } + } + } catch { + // non-JSON → binary audio + } + + if (isBinary) { + upstream.send(data, { binary: true }) + } + }) + + clientWs.on('close', () => { + console.log('[SixtydbProxy] client disconnected') + clientClosed = true + if (upstream.readyState === NodeWebSocket.OPEN) { + try { upstream.send(JSON.stringify({ type: 'stop' })) } catch { /* ignore */ } + upstream.close(1000, 'Client disconnected') + } + }) + + clientWs.on('error', (error) => { + console.error('[SixtydbProxy] client WebSocket error:', error) + clientClosed = true + upstream.close(1000, 'Client error') + }) +} + +function formatSixtydbConnectionError(error: Error): string { + const msg = error.message || 'WebSocket connection error' + const lower = msg.toLowerCase() + + if (lower.includes('401') || lower.includes('unauthorized')) { + return '60db API key is invalid or expired — check your API key setting.' + } + if (lower.includes('403') || lower.includes('forbidden')) { + return '60db API key lacks permission — check your workspace access.' + } + if (lower.includes('enotfound') || lower.includes('getaddrinfo')) { + return 'Could not resolve api.60db.ai — check your network connection.' + } + if (lower.includes('timeout') || lower.includes('etimedout')) { + return 'Connection to 60db timed out — check your network.' + } + if (lower.includes('econnreset') || lower.includes('socket hang up')) { + return '60db connection was reset — please retry.' + } + return msg +} + +export function attachSixtydbProxyServer(wss: WebSocketServer): void { + wss.on('connection', handleSixtydbConnection) +}