Merge pull request #363 from deepgram/feat/nova-3

feat: support nova 3 and keyterms
deepgram · Feb 11, 2025 · 9c75aaf · 9c75aaf
2 parents 393a5c8 + 90e95dd
commit 9c75aaf
Show file tree

Hide file tree

Showing 12 changed files with 52 additions and 7 deletions.
diff --git a/Deepgram/Clients/Agent/v2/Websocket/Client.cs b/Deepgram/Clients/Agent/v2/Websocket/Client.cs
@@ -7,6 +7,7 @@
 using Deepgram.Models.Agent.v2.WebSocket;
 using Common = Deepgram.Models.Common.v2.WebSocket;
 using Deepgram.Clients.Interfaces.v2;
+using Deepgram.Models.Exceptions.v1;
 
 namespace Deepgram.Clients.Agent.v2.WebSocket;
 
@@ -51,6 +52,10 @@ public Client(string? apiKey = null, IDeepgramClientOptions? options = null) : b
     public async Task<bool> Connect(SettingsConfigurationSchema options, CancellationTokenSource? cancelToken = null, Dictionary<string, string>? addons = null,
         Dictionary<string, string>? headers = null)
     {
+        if (!options.Agent.Listen.Model.StartsWith("nova-3") && options.Agent.Listen.Keyterms?.Count > 0)
+        {
+            throw new DeepgramException("Keyterms is only supported in Nova 3 models.");
+        }
         Log.Verbose("AgentWSClient.Connect", "ENTER");
         Log.Information("Connect", $"options:\n{JsonSerializer.Serialize(options, JsonSerializeOptions.DefaultOptions)}");
         Log.Debug("Connect", $"addons: {addons}");

diff --git a/Deepgram/Clients/Listen/v2/WebSocket/Client.cs b/Deepgram/Clients/Listen/v2/WebSocket/Client.cs
@@ -8,6 +8,7 @@
 using Deepgram.Models.Listen.v2.WebSocket;
 using Common = Deepgram.Models.Common.v2.WebSocket;
 using Deepgram.Clients.Interfaces.v2;
+using Deepgram.Models.Exceptions.v1;
 
 namespace Deepgram.Clients.Listen.v2.WebSocket;
 
@@ -49,6 +50,10 @@ public Client(string? apiKey = null, IDeepgramClientOptions? options = null) : b
     public async Task<bool> Connect(LiveSchema options, CancellationTokenSource? cancelToken = null, Dictionary<string, string>? addons = null,
         Dictionary<string, string>? headers = null)
     {
+        if (!options.Model.StartsWith("nova-3") && options.Keyterms?.Count > 0)
+        {
+            throw new DeepgramException("Keyterms is only supported in Nova 3 models.");
+        }
         Log.Verbose("ListenWSClient.Connect", "ENTER");
         Log.Information("Connect", $"options:\n{JsonSerializer.Serialize(options, JsonSerializeOptions.DefaultOptions)}");
         Log.Debug("Connect", $"addons: {addons}");

diff --git a/Deepgram/Models/Agent/v2/WebSocket/Listen.cs b/Deepgram/Models/Agent/v2/WebSocket/Listen.cs
@@ -8,7 +8,11 @@ public record Listen
 {
     [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
     [JsonPropertyName("model")]
-    public string Model { get; set; } = "nova-2";
+    public string Model { get; set; }
+
+    [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+    [JsonPropertyName("keyterms")]
+    public List<string>? Keyterms { get; set; }
 
     /// <summary>
     /// Override ToString method to serialize the object

diff --git a/Deepgram/Models/Listen/v1/REST/PreRecordedSchema.cs b/Deepgram/Models/Listen/v1/REST/PreRecordedSchema.cs
@@ -148,6 +148,14 @@ public class PreRecordedSchema
 	[JsonPropertyName("keywords")]
     public List<string>? Keywords { get; set; }
 
+    /// <summary>
+    /// Keyterm Prompting allows you improve Keyword Recall Rate (KRR) for important keyterms or phrases up to 90%.
+    /// <see href="https://developers.deepgram.com/docs/keyterm">
+    /// </summary>
+    [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+    [JsonPropertyName("keyterms")]
+    public List<string>? Keyterms { get; set; }
+
     /// <summary>
     /// Primary spoken language of submitted audio 
     /// <see href="https://developers.deepgram.com/docs/language">

diff --git a/Deepgram/Models/Listen/v2/WebSocket/LiveSchema.cs b/Deepgram/Models/Listen/v2/WebSocket/LiveSchema.cs
@@ -117,6 +117,14 @@ public class LiveSchema
 	[JsonPropertyName("keywords")]
     public List<string>? Keywords { get; set; }
 
+    /// <summary>
+    /// Keyterm Prompting allows you improve Keyword Recall Rate (KRR) for important keyterms or phrases up to 90%.
+    /// <see href="https://developers.deepgram.com/docs/keyterm">
+    /// </summary>
+    [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+    [JsonPropertyName("keyterms")]
+    public List<string>? Keyterms { get; set; }
+
     /// <summary>
     /// Primary spoken language of submitted audio 
     /// <see href="https://developers.deepgram.com/docs/language">

diff --git a/examples/agent/websocket/simple/Program.cs b/examples/agent/websocket/simple/Program.cs
@@ -6,6 +6,7 @@
 using Deepgram.Microphone;
 using Deepgram.Models.Authenticate.v1;
 using Deepgram.Models.Agent.v2.WebSocket;
+using System.Collections.Generic;
 
 namespace SampleApp
 {
@@ -194,6 +195,8 @@ await agentClient.Subscribe(new EventHandler<ErrorResponse>((sender, e) =>
                 settingsConfiguration.Audio.Input.SampleRate = 44100;
                 settingsConfiguration.Context.Messages = new List<object> {};
                 settingsConfiguration.Context.Replay = false;
+                settingsConfiguration.Agent.Listen.Model = "nova-3";
+                settingsConfiguration.Agent.Listen.Keyterms = new List<string> { "Deepgram" };
 
                 bool bConnected = await agentClient.Connect(settingsConfiguration);
                 if (!bConnected)

diff --git a/examples/speech-to-text/rest/file/Program.cs b/examples/speech-to-text/rest/file/Program.cs
@@ -4,6 +4,7 @@
 
 using Deepgram.Logger;
 using Deepgram.Models.Listen.v1.REST;
+using System.Collections.Generic;
 
 namespace PreRecorded
 {
@@ -36,7 +37,8 @@ static async Task Main(string[] args)
                 audioData,
                 new PreRecordedSchema()
                 {
-                    Model = "nova-2",
+                    Model = "nova-3",
+                    Keyterms = new List<string> { "Bueller" },
                     Punctuate = true,
                 },
                 cancelToken);

diff --git a/examples/speech-to-text/rest/intent/Program.cs b/examples/speech-to-text/rest/intent/Program.cs
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: MIT
 
 using Deepgram.Models.Listen.v1.REST;
+using System.Collections.Generic;
 
 namespace PreRecorded
 {
@@ -29,7 +30,8 @@ static async Task Main(string[] args)
                 audioData,
                 new PreRecordedSchema()
                 {
-                    Model = "nova-2",
+                    Model = "nova-3",
+                    Keyterms = new List<string> { "Call Center" },
                     Punctuate = true,
                     Intents = true,
                 });

diff --git a/examples/speech-to-text/rest/sentiment/Program.cs b/examples/speech-to-text/rest/sentiment/Program.cs
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: MIT
 
 using Deepgram.Models.Listen.v1.REST;
+using System.Collections.Generic;
 
 namespace PreRecorded
 {
@@ -29,7 +30,8 @@ static async Task Main(string[] args)
                 audioData,
                 new PreRecordedSchema()
                 {
-                    Model = "nova-2",
+                    Model = "nova-3",
+                    Keyterms = new List<string> { "Call Center" },
                     Punctuate = true,
                     Utterances = true,
                     Sentiment = true,

diff --git a/examples/speech-to-text/rest/summary/Program.cs b/examples/speech-to-text/rest/summary/Program.cs
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: MIT
 
 using Deepgram.Models.Listen.v1.REST;
+using System.Collections.Generic;
 
 namespace PreRecorded
 {
@@ -29,7 +30,8 @@ static async Task Main(string[] args)
                 audioData,
                 new PreRecordedSchema()
                 {
-                    Model = "nova-2",
+                    Model = "nova-3",
+                    Keyterms = new List<string> { "Call Center" },
                     Punctuate = true,
                     Summarize = "v2",
                 });

diff --git a/examples/speech-to-text/rest/topic/Program.cs b/examples/speech-to-text/rest/topic/Program.cs
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: MIT
 
 using Deepgram.Models.Listen.v1.REST;
+using System.Collections.Generic;
 
 namespace PreRecorded
 {
@@ -29,7 +30,8 @@ static async Task Main(string[] args)
                 audioData,
                 new PreRecordedSchema()
                 {
-                    Model = "nova-2",
+                    Model = "nova-3",
+                    Keyterms = new List<string> { "Call Center" },
                     Punctuate = true,
                     Topics = true,
                 });

diff --git a/examples/speech-to-text/rest/url/Program.cs b/examples/speech-to-text/rest/url/Program.cs
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: MIT
 
 using Deepgram.Models.Listen.v1.REST;
+using System.Collections.Generic;
 
 namespace PreRecorded
 {
@@ -25,7 +26,8 @@ static async Task Main(string[] args)
                 new UrlSource("https://dpgr.am/bueller.wav"),
                 new PreRecordedSchema()
                 {
-                    Model = "nova-2",
+                    Model = "nova-3",
+                    Keyterms = new List<string> { "Bueller" },
                 },
                 null, // use the default timeout
                 customOptions);