From 68608615db85db3a0633d17a1f28c619e1c91be6 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Fri, 25 Apr 2025 23:24:06 +0000 Subject: [PATCH 1/2] feat: Add on reasoning event callback function handling in streaming chat completions Signed-off-by: Eden Reich --- src/client.ts | 7 +++++ tests/client.test.ts | 62 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/src/client.ts b/src/client.ts index 0396a83..650a9a1 100644 --- a/src/client.ts +++ b/src/client.ts @@ -12,6 +12,7 @@ import { ChatCompletionToolType } from './types/generated'; interface ChatCompletionStreamCallbacks { onOpen?: () => void; onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void; + onReasoning?: (reasoningContent: string) => void; onContent?: (content: string) => void; onTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void; onFinish?: ( @@ -257,6 +258,12 @@ export class InferenceGatewayClient { JSON.parse(data); callbacks.onChunk?.(chunk); + const reasoning_content = + chunk.choices[0]?.delta?.reasoning_content; + if (reasoning_content !== undefined) { + callbacks.onReasoning?.(reasoning_content); + } + const content = chunk.choices[0]?.delta?.content; if (content) { callbacks.onContent?.(content); diff --git a/tests/client.test.ts b/tests/client.test.ts index c0b35e4..58d4fd4 100644 --- a/tests/client.test.ts +++ b/tests/client.test.ts @@ -263,6 +263,68 @@ describe('InferenceGatewayClient', () => { ); }); + it('should handle streaming chat completions reasoning and content', async () => { + const mockRequest = { + model: 'gpt-4o', + messages: [ + { role: MessageRole.user, content: 'Hello' }, + ], + stream: true, + }; + const mockStream = new TransformStream(); + const writer = mockStream.writable.getWriter(); + const encoder = new TextEncoder(); + mockFetch.mockResolvedValueOnce({ + ok: true, + body: mockStream.readable, + }); + const callbacks = { + onOpen: jest.fn(), + onChunk: jest.fn(), + onReasoning: jest.fn(), + onContent: jest.fn(), + onFinish: jest.fn(), + }; + const streamPromise = client.streamChatCompletion(mockRequest, callbacks); + await writer.write( + encoder.encode( + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' + + 'data: [DONE]\n\n' + ) + ); + await writer.close(); + await streamPromise; + expect(callbacks.onOpen).toHaveBeenCalledTimes(1); + expect(callbacks.onChunk).toHaveBeenCalledTimes(8); + expect(callbacks.onReasoning).toHaveBeenCalledTimes(5); + expect(callbacks.onReasoning).toHaveBeenCalledWith('This'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' is'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' a'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' content'); + expect(callbacks.onContent).toHaveBeenCalledTimes(2); + expect(callbacks.onContent).toHaveBeenCalledWith('Hello'); + expect(callbacks.onContent).toHaveBeenCalledWith('!'); + expect(callbacks.onFinish).toHaveBeenCalledTimes(1); + expect(mockFetch).toHaveBeenCalledWith( + 'http://localhost:8080/v1/chat/completions', + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ + ...mockRequest, + stream: true, + }), + }) + ); + }); + it('should handle tool calls in streaming chat completions', async () => { const mockRequest = { model: 'gpt-4o', From 2bbe7f94c07c88c4a3e8aee17ef29a264501e8f1 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Fri, 25 Apr 2025 23:30:23 +0000 Subject: [PATCH 2/2] docs: Update readme with the new callback Signed-off-by: Eden Reich --- README.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1f63bab..ebe5f15 100644 --- a/README.md +++ b/README.md @@ -155,11 +155,11 @@ const client = new InferenceGatewayClient({ try { await client.streamChatCompletion( { - model: 'gpt-4o', + model: 'openai/gpt-4o', messages: [ { role: MessageRole.User, - content: 'What's the weather in San Francisco?', + content: "What's the weather in San Francisco?", }, ], tools: [ @@ -186,10 +186,14 @@ try { console.log('Tool call:', toolCall.function.name); console.log('Arguments:', toolCall.function.arguments); }, - onContent: (content) => process.stdout.write(content), + onReasoning: (reasoning) => { + console.log('Reasoning:', reasoning); + }, + onContent: (content) => { + console.log('Content:', content); + }, onFinish: () => console.log('\nStream completed'), - }, - Provider.OpenAI + } ); } catch (error) { console.error('Error:', error);