Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Functionality for Closing Stream (w/o Cancel) Allow Transcription #343

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Deepgram/Clients/Interfaces/v1/IListenWebSocketClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public interface IListenWebSocketClient
public Task Connect(LiveSchema options, CancellationTokenSource? cancelToken = null, Dictionary<string, string>? addons = null,
Dictionary<string, string>? headers = null);

public Task Stop(CancellationTokenSource? cancelToken = null);
public Task Stop(CancellationTokenSource? cancelToken = null, bool nullByte = false);
#endregion

#region Subscribe Event
Expand Down
4 changes: 2 additions & 2 deletions Deepgram/Clients/Interfaces/v1/ISpeakWebSocketClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public interface ISpeakWebSocketClient
public Task Connect(SpeakSchema options, CancellationTokenSource? cancelToken = null, Dictionary<string, string>? addons = null,
Dictionary<string, string>? headers = null);

public Task Stop(CancellationTokenSource? cancelToken = null);
public Task Stop(CancellationTokenSource? cancelToken = null, bool nullByte = false);
#endregion

#region Subscribe Event
Expand Down Expand Up @@ -97,7 +97,7 @@ public Task Connect(SpeakSchema options, CancellationTokenSource? cancelToken =
///// <summary>
///// This method tells Deepgram to initiate the close server-side.
///// </summary>
public void Close();
public void Close(bool nullByte = false);

///// <summary>
///// This method sends a binary message over the WebSocket connection.
Expand Down
19 changes: 5 additions & 14 deletions Deepgram/Clients/Listen/v1/WebSocket/Client.cs
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,7 @@
/// Closes the Web Socket connection to the Deepgram API
/// </summary>
/// <returns>The task object representing the asynchronous operation.</returns>
public async Task Stop(CancellationTokenSource? cancelToken = null)
public async Task Stop(CancellationTokenSource? cancelToken = null, bool nullByte = false)
{
Log.Verbose("ListenWSClient.Stop", "ENTER");

Expand All @@ -842,25 +842,16 @@

try
{
// cancel the internal token to stop all threads
if (_cancellationTokenSource != null)
{
Log.Debug("Stop", "Cancelling native token...");
_cancellationTokenSource.Cancel();
}

// if websocket is open, send a close message
if (_clientWebSocket!.State == WebSocketState.Open)
{
Log.Debug("Stop", "Sending Close message...");
// send a close to Deepgram
lock (_mutexSend)
{
_clientWebSocket.SendAsync(new ArraySegment<byte>([0]), WebSocketMessageType.Binary, true, cancelToken.Token)
.ConfigureAwait(false);
}
SendClose(nullByte);
}

// small delay to wait for any final transcription
await Task.Delay(100, cancelToken.Token).ConfigureAwait(false);

davidvonthenen marked this conversation as resolved.
Show resolved Hide resolved
// send a CloseResponse event
if (_closeReceived != null)
{
Expand Down Expand Up @@ -1006,7 +997,7 @@

if (_deepgramClientOptions.AutoFlushReplyDelta > 0)
{
if ((bool)resultResponse.IsFinal)

Check warning on line 1000 in Deepgram/Clients/Listen/v1/WebSocket/Client.cs

View workflow job for this annotation

GitHub Actions / build

Nullable value type may be null.

Check warning on line 1000 in Deepgram/Clients/Listen/v1/WebSocket/Client.cs

View workflow job for this annotation

GitHub Actions / test (6.0.x)

Nullable value type may be null.

Check warning on line 1000 in Deepgram/Clients/Listen/v1/WebSocket/Client.cs

View workflow job for this annotation

GitHub Actions / test (7.0.x)

Nullable value type may be null.

Check warning on line 1000 in Deepgram/Clients/Listen/v1/WebSocket/Client.cs

View workflow job for this annotation

GitHub Actions / test (8.0.x)

Nullable value type may be null.
{
var now = DateTime.Now;
Log.Debug("InspectMessage", $"AutoFlush IsFinal received. Time: {now}");
Expand Down
35 changes: 19 additions & 16 deletions Deepgram/Clients/Speak/v1/WebSocket/Client.cs
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,23 @@ public void Clear()
/// <summary>
/// This method tells Deepgram to initiate the close server-side.
/// </summary>
public void Close()
public void Close(bool nullByte = false)
{
Log.Debug("SendFinalize", "Sending Close Message Immediately...");
if (nullByte && _clientWebSocket != null)
{
// send a close to Deepgram
lock (_mutexSend)
{
_clientWebSocket.SendAsync(new ArraySegment<byte>([0]), WebSocketMessageType.Binary, true, _cancellationTokenSource.Token)
.ConfigureAwait(false);
}
return;
}

ControlMessage controlMessage = new ControlMessage(Constants.Close);
byte[] byteArray = Encoding.UTF8.GetBytes(controlMessage.ToString());
Send(byteArray);
SendMessageImmediately(byteArray);
}

/// <summary>
Expand Down Expand Up @@ -860,7 +872,7 @@ internal void ProcessDataReceived(WebSocketReceiveResult result, MemoryStream ms
/// Closes the Web Socket connection to the Deepgram API
/// </summary>
/// <returns>The task object representing the asynchronous operation.</returns>
public async Task Stop(CancellationTokenSource? cancelToken = null)
public async Task Stop(CancellationTokenSource? cancelToken = null, bool nullByte = false)
{
Log.Verbose("SpeakWSClient.Stop", "ENTER");

Expand All @@ -880,25 +892,16 @@ public async Task Stop(CancellationTokenSource? cancelToken = null)

try
{
// cancel the internal token to stop all threads
if (_cancellationTokenSource != null)
{
Log.Debug("Stop", "Cancelling native token...");
_cancellationTokenSource.Cancel();
}

// if websocket is open, send a close message
if (_clientWebSocket!.State == WebSocketState.Open)
{
Log.Debug("Stop", "Sending Close message...");
// send a close to Deepgram
lock (_mutexSend)
{
_clientWebSocket.SendAsync(new ArraySegment<byte>([0]), WebSocketMessageType.Binary, true, cancelToken.Token)
.ConfigureAwait(false);
}
Close(nullByte);
}

// small delay to wait for any final transcription
await Task.Delay(100, cancelToken.Token).ConfigureAwait(false);

// send a CloseResponse event
if (_closeReceived != null)
{
Expand Down
74 changes: 72 additions & 2 deletions examples/text-to-speech/websocket/simple/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ static async Task Main(string[] args)
//var speakClient = ClientFactory.CreateSpeakWebSocketClient("", options);
var speakClient = ClientFactory.CreateSpeakWebSocketClient();

// append wav header only once
bool appendWavHeader = true;

// Subscribe to the EventResponseReceived event
speakClient.Subscribe(new EventHandler<OpenResponse>((sender, e) =>
{
Expand All @@ -41,9 +44,72 @@ static async Task Main(string[] args)
{
Console.WriteLine($"----> {e.Type} received");

// add a wav header
if (appendWavHeader)
{
using (BinaryWriter writer = new BinaryWriter(File.Open("output.wav", FileMode.Append)))
{
Console.WriteLine("Adding WAV header to output.wav");
byte[] wavHeader = new byte[44];
int sampleRate = 48000;
short bitsPerSample = 16;
short channels = 1;
int byteRate = sampleRate * channels * (bitsPerSample / 8);
short blockAlign = (short)(channels * (bitsPerSample / 8));

wavHeader[0] = 0x52; // R
wavHeader[1] = 0x49; // I
wavHeader[2] = 0x46; // F
wavHeader[3] = 0x46; // F
wavHeader[4] = 0x00; // Placeholder for file size (will be updated later)
wavHeader[5] = 0x00; // Placeholder for file size (will be updated later)
wavHeader[6] = 0x00; // Placeholder for file size (will be updated later)
wavHeader[7] = 0x00; // Placeholder for file size (will be updated later)
wavHeader[8] = 0x57; // W
wavHeader[9] = 0x41; // A
wavHeader[10] = 0x56; // V
wavHeader[11] = 0x45; // E
wavHeader[12] = 0x66; // f
wavHeader[13] = 0x6D; // m
wavHeader[14] = 0x74; // t
wavHeader[15] = 0x20; // Space
wavHeader[16] = 0x10; // Subchunk1Size (16 for PCM)
wavHeader[17] = 0x00; // Subchunk1Size
wavHeader[18] = 0x00; // Subchunk1Size
wavHeader[19] = 0x00; // Subchunk1Size
wavHeader[20] = 0x01; // AudioFormat (1 for PCM)
wavHeader[21] = 0x00; // AudioFormat
wavHeader[22] = (byte)channels; // NumChannels
wavHeader[23] = 0x00; // NumChannels
wavHeader[24] = (byte)(sampleRate & 0xFF); // SampleRate
wavHeader[25] = (byte)((sampleRate >> 8) & 0xFF); // SampleRate
wavHeader[26] = (byte)((sampleRate >> 16) & 0xFF); // SampleRate
wavHeader[27] = (byte)((sampleRate >> 24) & 0xFF); // SampleRate
wavHeader[28] = (byte)(byteRate & 0xFF); // ByteRate
wavHeader[29] = (byte)((byteRate >> 8) & 0xFF); // ByteRate
wavHeader[30] = (byte)((byteRate >> 16) & 0xFF); // ByteRate
wavHeader[31] = (byte)((byteRate >> 24) & 0xFF); // ByteRate
wavHeader[32] = (byte)blockAlign; // BlockAlign
wavHeader[33] = 0x00; // BlockAlign
wavHeader[34] = (byte)bitsPerSample; // BitsPerSample
wavHeader[35] = 0x00; // BitsPerSample
wavHeader[36] = 0x64; // d
wavHeader[37] = 0x61; // a
wavHeader[38] = 0x74; // t
wavHeader[39] = 0x61; // a
wavHeader[40] = 0x00; // Placeholder for data chunk size (will be updated later)
wavHeader[41] = 0x00; // Placeholder for data chunk size (will be updated later)
wavHeader[42] = 0x00; // Placeholder for data chunk size (will be updated later)
wavHeader[43] = 0x00; // Placeholder for data chunk size (will be updated later)

writer.Write(wavHeader);
appendWavHeader = false;
}
}
davidvonthenen marked this conversation as resolved.
Show resolved Hide resolved

if (e.Stream != null)
{
using (BinaryWriter writer = new BinaryWriter(File.Open("output.mp3", FileMode.Append)))
using (BinaryWriter writer = new BinaryWriter(File.Open("output.wav", FileMode.Append)))
{
writer.Write(e.Stream.ToArray());
davidvonthenen marked this conversation as resolved.
Show resolved Hide resolved
}
Expand Down Expand Up @@ -75,7 +141,11 @@ static async Task Main(string[] args)
}));

// Start the connection
var speakSchema = new SpeakSchema();
var speakSchema = new SpeakSchema()
{
Encoding = "linear16",
SampleRate = 48000
};
davidvonthenen marked this conversation as resolved.
Show resolved Hide resolved
await speakClient.Connect(speakSchema);

// Send some Text to convert to audio
Expand Down
Loading