Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions src/lib/realtime-voice-gateway-relay.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ function inputWithLevel(level: number) {
}

describe("realtime gateway relay barge-in detection", () => {
it("keeps the existing desktop sensitivity", () => {
it("requires sustained desktop speech before barge-in", () => {
let speechFrames = 0;
for (let i = 0; i < DESKTOP_REALTIME_BARGE_IN_PROFILE.frames; i += 1) {
const result = detectRealtimeBargeIn({
input: inputWithLevel(0.09),
input: inputWithLevel(0.11),
activeOutput: true,
cancelRequested: false,
speechFrames,
Expand Down Expand Up @@ -53,32 +53,50 @@ describe("realtime gateway relay barge-in detection", () => {
let speechFrames = 0;
for (let i = 0; i < MOBILE_REALTIME_BARGE_IN_PROFILE.frames - 1; i += 1) {
const result = detectRealtimeBargeIn({
input: inputWithLevel(0.17),
input: inputWithLevel(0.23),
activeOutput: true,
cancelRequested: false,
speechFrames,
outputStartedAtMs: 1_000,
nowMs: 2_000,
nowMs: 2_500,
profile: MOBILE_REALTIME_BARGE_IN_PROFILE,
});
speechFrames = result.speechFrames;
expect(result.triggered).toBe(false);
}

const result = detectRealtimeBargeIn({
input: inputWithLevel(0.17),
input: inputWithLevel(0.23),
activeOutput: true,
cancelRequested: false,
speechFrames,
outputStartedAtMs: 1_000,
nowMs: 2_000,
nowMs: 2_500,
profile: MOBILE_REALTIME_BARGE_IN_PROFILE,
});

expect(result.triggered).toBe(true);
expect(result.suppressInput).toBe(false);
});

it("does not interrupt mobile output for short speech bursts", () => {
let speechFrames = 0;
for (let i = 0; i < MOBILE_REALTIME_BARGE_IN_PROFILE.frames - 2; i += 1) {
const result = detectRealtimeBargeIn({
input: inputWithLevel(0.3),
activeOutput: true,
cancelRequested: false,
speechFrames,
outputStartedAtMs: 1_000,
nowMs: 2_500,
profile: MOBILE_REALTIME_BARGE_IN_PROFILE,
});
speechFrames = result.speechFrames;
expect(result.triggered).toBe(false);
expect(result.suppressInput).toBe(true);
}
});

it("suppresses mobile playback echo until barge-in is confirmed", () => {
const echo = detectRealtimeBargeIn({
input: inputWithLevel(0.08),
Expand Down
14 changes: 7 additions & 7 deletions src/lib/realtime-voice-gateway-relay.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ import { base64ToBytes, bytesToBase64, floatToPcm16, pcm16ToFloat, rmsLevel } fr

export type RealtimeVoiceStatus = "idle" | "listening" | "processing" | "speaking" | "error";

const BARGE_IN_RMS_THRESHOLD = 0.02;
const BARGE_IN_PEAK_THRESHOLD = 0.08;
const BARGE_IN_FRAMES = 2;
const MOBILE_BARGE_IN_RMS_THRESHOLD = 0.055;
const MOBILE_BARGE_IN_PEAK_THRESHOLD = 0.16;
const MOBILE_BARGE_IN_FRAMES = 4;
const MOBILE_BARGE_IN_GRACE_MS = 750;
const BARGE_IN_RMS_THRESHOLD = 0.03;
const BARGE_IN_PEAK_THRESHOLD = 0.1;
const BARGE_IN_FRAMES = 3;
const MOBILE_BARGE_IN_RMS_THRESHOLD = 0.075;
const MOBILE_BARGE_IN_PEAK_THRESHOLD = 0.22;
const MOBILE_BARGE_IN_FRAMES = 7;
const MOBILE_BARGE_IN_GRACE_MS = 1200;
const REALTIME_VOICE_CONTEXT_LIMIT = 8;

export interface RealtimeBargeInProfile {
Expand Down
Loading