diff --git a/CHANGELOG.md b/CHANGELOG.md index 307f336..3ee00e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- **Deepgram keyterms configuration** — `voice.deepgramKeyterms` lets users + provide words or phrases (for example app names, tools, and product names) + that are sent to Deepgram as `keyterm` query parameters to improve Nova-3 + recognition accuracy. + ## [7.2.2] - 2026-05-01 ### Added diff --git a/README.md b/README.md index 66c6381..f3f1b44 100644 --- a/README.md +++ b/README.md @@ -296,6 +296,24 @@ Settings stored in Pi's settings files under the `voice` key: into `~/.pi/agent/settings.json`. If you paste a key during onboarding, that is an explicit save and it still goes to `~/.env.secrets` or `~/.zshrc`. +### Deepgram keyterms + +When using the Deepgram backend, add `deepgramKeyterms` to bias recognition +toward words or phrases that are frequently misheard, such as app names, +libraries, or product names: + +```json +{ + "voice": { + "backend": "deepgram", + "deepgramKeyterms": ["ffmpeg", "GStreamer", "pi agent"] + } +} +``` + +Each entry is sent to Deepgram as a `keyterm` query parameter. This setting is +ignored by the local/offline backend. + --- ## Troubleshooting diff --git a/extensions/voice/config.ts b/extensions/voice/config.ts index befcac6..8289743 100644 --- a/extensions/voice/config.ts +++ b/extensions/voice/config.ts @@ -39,6 +39,8 @@ export interface VoiceConfig { localEndpoint?: string; /** Global-only shortcut used to toggle recording without hold-to-talk */ toggleShortcut?: string; + /** Deepgram Nova-3 keyterms: words/phrases to bias recognition toward. */ + deepgramKeyterms?: string[]; // ─── TTS (text-to-speech) ───────────────────────────────────────── // All TTS fields are opt-in (default: TTS disabled). New in v6.0.0. @@ -125,6 +127,7 @@ export const DEFAULT_CONFIG: VoiceConfig = { localModel: undefined, localEndpoint: undefined, toggleShortcut: "ctrl+shift+v", + deepgramKeyterms: [], // TTS defaults — all opt-in ttsEnabled: false, ttsBackend: "local", @@ -197,6 +200,9 @@ function migrateConfig(rawVoice: any, source: VoiceConfigSource): VoiceConfig { toggleShortcut: source !== "project" && typeof rawVoice.toggleShortcut === "string" ? rawVoice.toggleShortcut : DEFAULT_CONFIG.toggleShortcut, + deepgramKeyterms: Array.isArray(rawVoice.deepgramKeyterms) + ? rawVoice.deepgramKeyterms.filter((term: unknown): term is string => typeof term === "string" && term.trim().length > 0) + : DEFAULT_CONFIG.deepgramKeyterms, // TTS fields — type-validated; mismatched persisted values fall // back to safe defaults so a hand-edited config can't poison the // engine. Notably: ttsLocalVoiceId rejects strings (would crash diff --git a/extensions/voice/deepgram.ts b/extensions/voice/deepgram.ts index a7e4f26..c8689a4 100644 --- a/extensions/voice/deepgram.ts +++ b/extensions/voice/deepgram.ts @@ -25,6 +25,10 @@ export function buildDeepgramWsUrl(config: VoiceConfig): string { smart_format: "true", interim_results: "true", }); + for (const keyterm of config.deepgramKeyterms ?? []) { + const normalized = keyterm.trim(); + if (normalized) params.append("keyterm", normalized); + } return `${DEEPGRAM_WS_URL}?${params.toString()}`; } diff --git a/tests/config.test.ts b/tests/config.test.ts index 24f22b2..8ab9e87 100644 --- a/tests/config.test.ts +++ b/tests/config.test.ts @@ -77,6 +77,19 @@ describe("loadConfigWithSource", () => { expect(result.config.onboarding.completed).toBe(false); }); + test("loads Deepgram keyterms from settings", () => { + const cwd = makeTempDir(); + const agentDir = path.join(cwd, "agent-home"); + writeSettings(agentDir, "settings.json", { + enabled: true, + deepgramKeyterms: ["Raycast", "", " ", "VS Code"], + }); + + const result = loadConfigWithSource(cwd, { agentDir }); + + expect(result.config.deepgramKeyterms).toEqual(["Raycast", "VS Code"]); + }); + test("prefers project config over global config and preserves project scope", () => { const cwd = makeTempDir(); const agentDir = path.join(cwd, "agent-home"); diff --git a/tests/deepgram.test.ts b/tests/deepgram.test.ts new file mode 100644 index 0000000..942d200 --- /dev/null +++ b/tests/deepgram.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, test } from "bun:test"; +import { DEFAULT_CONFIG } from "../extensions/voice/config"; +import { buildDeepgramWsUrl } from "../extensions/voice/deepgram"; + +describe("buildDeepgramWsUrl", () => { + test("adds Deepgram keyterms from config", () => { + const url = new URL(buildDeepgramWsUrl({ + ...DEFAULT_CONFIG, + language: "pt-BR", + deepgramKeyterms: ["Raycast", "Linear", "VS Code"], + })); + + expect(url.searchParams.getAll("keyterm")).toEqual(["Raycast", "Linear", "VS Code"]); + }); + + test("skips blank Deepgram keyterms", () => { + const url = new URL(buildDeepgramWsUrl({ + ...DEFAULT_CONFIG, + deepgramKeyterms: ["", " ", "Cursor"], + })); + + expect(url.searchParams.getAll("keyterm")).toEqual(["Cursor"]); + }); +});