Skip to content

Commit

Permalink
feat: spec out the api docs
Browse files Browse the repository at this point in the history
  • Loading branch information
naomi-lgbt committed Oct 19, 2024
1 parent 753be7e commit f2f6e98
Show file tree
Hide file tree
Showing 5 changed files with 342 additions and 0 deletions.
24 changes: 24 additions & 0 deletions src/lib/enums/AgentEvents.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
export enum AgentEvents {
/**
* Built in socket events.
*/
Open = "Open",
Close = "Close",
Error = "Error",
/**
* Message { type: string }
*/
Welcome = "Welcome",
ConversationText = "ConversationText",
UserStartedSpeaking = "UserStartedSpeaking",
AgentThinking = "AgentThinking",
FunctionCalling = "FunctionCalling",
AgentStartedSpeaking = "AgentStartedSpeaking",
AgentAudioDone = "AgentAudioDone",
InjectionRefused = "InjectionRefused",

/**
* Catch all for any other message event
*/
Unhandled = "Unhandled",
}
190 changes: 190 additions & 0 deletions src/lib/types/AgentLiveSchema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// TODO: We could probably use this elsewhere?
type AudioFormat =
| {
encoding: "linear16";
container: "wav" | "none";
sampleRate: 8000 | 16000 | 24000 | 32000 | 48000;
}
| {
encoding: "mulaw";
container: "wav" | "none";
sampleRate: 8000 | 16000;
}
| {
encoding: "alaw";
container: "wav" | "none";
sampleRate: 8000 | 16000;
}
| {
encoding: "mp3";
bitrate: 32000 | 48000;
}
| {
encoding: "opus";
container: "ogg";
/**
* Must be between 4000 and 650000, inclusive.
*/
bitrate: number;
}
| {
encoding: "flac";
sampleRate: 8000 | 16000 | 22050 | 32000 | 48000;
}
| {
encoding: "aac";
/**
* Must be between 4000 and 192000 inclusive.
*/
bitrate: number;
};

type ListenModel =
| "nova-2"
| "nova-2-meeting"
| "nova-2-phonecall"
| "nova-2-voicemail"
| "nova-2-finance"
| "nova-2-conversational"
| "nova-2-video"
| "nova-2-medical"
| "nova-2-drivethru"
| "nova-2-automotive"
| "nova-2-atc"
| "nova"
| "nova-phonecall"
| "enhanced"
| "enhanced-meeting"
| "enhanced-phonecall"
| "enhanced-finance"
| "base"
| "base-meeting"
| "base-phonecall"
| "base-voicemail"
| "base-finance"
| "base-conversational"
| "base-video"
| "whisper-tiny"
| "whisper"
| "whisper-small"
| "whisper-medium"
| "whisper-large";

type SpeakModel =
| "aura-asteria-en"
| "aura-luna-en"
| "aura-stella-en"
| "aura-athena-en"
| "aura-hera-en"
| "aura-orion-en"
| "aura-arcas-en"
| "aura-perseus-en"
| "aura-angus-en"
| "aura-orpheus-en"
| "aura-helios-en"
| "aura-zeus-en";

interface ThinkModelFunction {
name: string;
description: string;
url: string;
headers: [
{
key: "authorization";
value: string;
}
];
method: "POST";
parameters: {
type: string;
properties: Record<
string,
{
type: string;
description: string;
}
>;
};
}

type ThinkModel =
| {
provider: {
type: "open_ai";
};
model: "gpt-4o-mini";
instructions: string;
functions: ThinkModelFunction[];
}
| {
provider: {
type: "anthropic";
};
model: "claude-3-haiku-20240307";
instructions: string;
functions: ThinkModelFunction[];
}
| {
provider: {
type: "groq";
};
model: "";
instructions: string;
functions: ThinkModelFunction[];
}
| {
provider: {
type: "custom";
url: string;
key: string;
};
model: string;
instructions: string;
functions: ThinkModelFunction[];
};

/**
* @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#settingsconfiguration
*/
interface AgentLiveSchema extends Record<string, unknown> {
audio: {
input?: {
encoding: string;
sampleRate: number;
};
/**
* @see https://developers.deepgram.com/docs/tts-media-output-settings#audio-format-combinations
*/
output?: AudioFormat;
};
agent: {
listen: {
/**
* @see https://developers.deepgram.com/docs/model
*/
model: ListenModel;
};
speak: {
/**
* @see https://developers.deepgram.com/docs/tts-models
*/
model: SpeakModel;
};
/**
* @see https://developers.deepgram.com/reference/voicebot-api-phase-preview#supported-llm-providers-and-models
*/
think: ThinkModel;
};
context: {
/**
* LLM message history (e.g. to restore existing conversation if websocket disconnects)
*/
messages: [];
/**
* Whether to replay the last message, if it is an assistant message.
*/
replay: boolean;
};
}

export type { AgentLiveSchema, SpeakModel };
1 change: 1 addition & 0 deletions src/lib/types/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
export * from "./AgentLiveSchema";
export * from "./AnalyzeSchema";
export * from "./AsyncAnalyzeResponse";
export * from "./AsyncPrerecordedResponse";
Expand Down
126 changes: 126 additions & 0 deletions src/packages/AgentLiveClient.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import { AgentEvents } from "../lib/enums/AgentEvents.js";
import type { AgentLiveSchema, SpeakModel } from "../lib/types";
import type { DeepgramClientOptions } from "../lib/types";
import { AbstractLiveClient } from "./AbstractLiveClient";

export class AgentLiveClient extends AbstractLiveClient {
public namespace: string = "agent";

constructor(options: DeepgramClientOptions, endpoint: string = ":version/agent") {
super(options);
/**
* According to the docs, this is the correct base URL for the Agent API.
* TODO: Make configurable for self-hosted customers.
*/
this.baseUrl = "wss://agent.deepgram.com";

/**
* TODO: Not sure we should send the options here.
* Think that needs to happen after Websocket is open.
*/
this.connect({}, endpoint);
}

/**
* Sets up the connection event handlers.
* This method is responsible for handling the various events that can occur on the WebSocket connection, such as opening, closing, and receiving messages.
* - When the connection is opened, it emits the `AgentEvents.Open` event.
* - When the connection is closed, it emits the `AgentEvents.Close` event.
* - When an error occurs on the connection, it emits the `AgentEvents.Error` event.
* - When a message is received, it parses the message and emits the appropriate event based on the message type.
*/
public setupConnection(): void {
if (this.conn) {
this.conn.onopen = () => {
this.emit(AgentEvents.Open, this);
};

this.conn.onclose = (event: any) => {
this.emit(AgentEvents.Close, event);
};

this.conn.onerror = (event: ErrorEvent) => {
this.emit(AgentEvents.Error, event);
};

this.conn.onmessage = (event: MessageEvent) => {
try {
const data: any = JSON.parse(event.data.toString());

if (data.type in AgentEvents) {
this.emit(data.type, data);
} else {
this.emit(AgentEvents.Unhandled, data);
}
} catch (error) {
this.emit(AgentEvents.Error, {
event,
message: "Unable to parse `data` as JSON.",
error,
});
}
};
}
}

/**
* To be called with your model configuration BEFORE sending
* any audio data.
* @param options - The SettingsConfiguration object.
* @param options.audio.input.encoding - The encoding for your inbound (user) audio.
* @param options.audio.input.sampleRate - The sample rate for your inbound (user) audio.
* @param options.audio.output.encoding - The encoding for your outbound (agent) audio.
* @param options.audio.output.sampleRate - The sample rate for your outbound (agent) audio.
* @param options.audio.output.bitrate - The bitrate for your outbound (agent) audio.
* @param options.audio.output.container - The container for your outbound (agent) audio.
* @param options.agent.listen.model - The STT model to use for processing user audio.
* @param options.agent.speak.model - The TTS model to use for generating agent audio.
* @param options.agent.think.provider.type - The LLM provider to use.
* @param options.agent.think.model - The LLM model to use.
* @param options.agent.think.instructions - The instructions to provide to the LLM.
* @param options.agent.think.functions - The functions to provide to the LLM.
* @param options.context.messages - The message history to provide to the LLM (useful if a websocket connection is lost.)
* @param options.context.replay - Whether to replay the last message if it was an assistant message.
*/
public configure(options: AgentLiveSchema): void {
this.send(JSON.stringify({ type: "SettingsConfiguration", options }));
}

/**
* Provide new instructions to the LLM.
* @param instructions - The instructions to provide.
*/
public updateInstructions(instructions: string): void {
this.send(JSON.stringify({ type: "UpdateInstructions", instructions }));
}

/**
* Change the speak model.
* @param model - The new model to use.
*/
public updateSpeak(model: SpeakModel): void {
this.send(JSON.stringify({ type: "UpdateSpeak", model }));
}

/**
* Immediately trigger an agent message. If this message
* is sent while the user is speaking, or while the server is in the
* middle of sending audio, then the request will be ignored and an InjectionRefused
* event will be emitted.
* @example "Hold on while I look that up for you."
* @example "Are you still on the line?"
* @param message - The message to speak.
*/
public injectAgentMessage(message: string): void {
this.send(JSON.stringify({ type: "InjectAgentMessage", message }));
}

/**
* Send a keepalive to avoid closing the websocket while you
* are not transmitting audio. This should be sent at least
* every 8 seconds.
*/
public keepAlive(): void {
this.send(JSON.stringify({ type: "KeepAlive" }));
}
}
1 change: 1 addition & 0 deletions src/packages/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
export * from "./AbstractClient";
export * from "./AbstractLiveClient";
export * from "./AbstractRestClient";
export * from "./AgentLiveClient";
export * from "./ListenClient";
export * from "./ListenLiveClient";
export * from "./ListenRestClient";
Expand Down

0 comments on commit f2f6e98

Please sign in to comment.