Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[8.x] [inference] add support for `auto` function calling mode (#208144) #208193

Merged
merged 1 commit into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ export type ChatCompleteOptions<
*/
modelName?: string;
/**
* Function calling mode, defaults to "native".
* Function calling mode, defaults to "auto".
*/
functionCalling?: FunctionCallingMode;
/**
Expand Down Expand Up @@ -152,7 +152,8 @@ export interface ChatCompleteResponse<TToolOptions extends ToolOptions = ToolOpt

/**
* Define the function calling mode when using inference APIs.
* - native will use the LLM's native function calling (requires the LLM to have native support)
* - simulated: will emulate function calling with function calling instructions
* - "native": will use the LLM's native function calling (requires the LLM to have native support)
* - "simulated": will emulate function calling with function calling instructions
* - "auto": will use "native" for providers we know are supporting native function call, "simulated" otherwise
*/
export type FunctionCallingMode = 'native' | 'simulated';
export type FunctionCallingMode = 'native' | 'simulated' | 'auto';
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ export interface OutputOptions<
*/
modelName?: string;
/**
* Function calling mode, defaults to "native".
* Function calling mode, defaults to "auto".
*/
functionCalling?: FunctionCallingMode;
/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const isNativeFunctionCallingSupportedMock = jest.fn();

jest.doMock('../../utils/function_calling_support', () => {
const actual = jest.requireActual('../../utils/function_calling_support');
return {
...actual,
isNativeFunctionCallingSupported: isNativeFunctionCallingSupportedMock,
};
});
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
* 2.0.
*/

import { isNativeFunctionCallingSupportedMock } from './inference_adapter.test.mocks';
import OpenAI from 'openai';
import { v4 } from 'uuid';
import { PassThrough } from 'stream';
import { lastValueFrom, Subject, toArray, filter } from 'rxjs';
import type { Logger } from '@kbn/logging';
import { loggerMock } from '@kbn/logging-mocks';
import {
ToolChoiceType,
ChatCompletionEventType,
MessageRole,
isChatCompletionChunkEvent,
Expand Down Expand Up @@ -48,21 +49,23 @@ function createOpenAIChunk({

describe('inferenceAdapter', () => {
const executorMock = {
getConnector: jest.fn(),
invoke: jest.fn(),
} as InferenceExecutor & { invoke: jest.MockedFn<InferenceExecutor['invoke']> };
} as InferenceExecutor & {
invoke: jest.MockedFn<InferenceExecutor['invoke']>;
getConnector: jest.MockedFn<InferenceExecutor['getConnector']>;
};

const logger = {
debug: jest.fn(),
error: jest.fn(),
} as unknown as Logger;
const logger = loggerMock.create();

beforeEach(() => {
executorMock.invoke.mockReset();
isNativeFunctionCallingSupportedMock.mockReset().mockReturnValue(true);
});

const defaultArgs = {
executor: executorMock,
logger: loggerMock.create(),
logger,
};

describe('when creating the request', () => {
Expand Down Expand Up @@ -232,6 +235,25 @@ describe('inferenceAdapter', () => {
]);
});

it('propagates the temperature parameter', () => {
inferenceAdapter.chatComplete({
logger,
executor: executorMock,
messages: [{ role: MessageRole.User, content: 'question' }],
temperature: 0.4,
});

expect(executorMock.invoke).toHaveBeenCalledTimes(1);
expect(executorMock.invoke).toHaveBeenCalledWith({
subAction: 'unified_completion_stream',
subActionParams: expect.objectContaining({
body: expect.objectContaining({
temperature: 0.4,
}),
}),
});
});

it('propagates the abort signal when provided', () => {
const abortController = new AbortController();

Expand All @@ -251,20 +273,26 @@ describe('inferenceAdapter', () => {
});
});

it('propagates the temperature parameter', () => {
it('uses the right value for functionCalling=auto', () => {
isNativeFunctionCallingSupportedMock.mockReturnValue(false);

inferenceAdapter.chatComplete({
logger,
executor: executorMock,
messages: [{ role: MessageRole.User, content: 'question' }],
temperature: 0.4,
tools: {
foo: { description: 'my tool' },
},
toolChoice: ToolChoiceType.auto,
functionCalling: 'auto',
});

expect(executorMock.invoke).toHaveBeenCalledTimes(1);
expect(executorMock.invoke).toHaveBeenCalledWith({
subAction: 'unified_completion_stream',
subActionParams: expect.objectContaining({
body: expect.objectContaining({
temperature: 0.4,
body: expect.not.objectContaining({
tools: expect.any(Array),
}),
}),
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
parseInlineFunctionCalls,
wrapWithSimulatedFunctionCalling,
} from '../../simulated_function_calling';
import { isNativeFunctionCallingSupported } from '../../utils/function_calling_support';
import {
toolsToOpenAI,
toolChoiceToOpenAI,
Expand All @@ -30,16 +31,19 @@ export const inferenceAdapter: InferenceConnectorAdapter = {
messages,
toolChoice,
tools,
functionCalling,
functionCalling = 'auto',
temperature = 0,
modelName,
logger,
abortSignal,
}) => {
const simulatedFunctionCalling = functionCalling === 'simulated';
const useSimulatedFunctionCalling =
functionCalling === 'auto'
? !isNativeFunctionCallingSupported(executor.getConnector())
: functionCalling === 'simulated';

let request: Omit<OpenAI.ChatCompletionCreateParams, 'model'> & { model?: string };
if (simulatedFunctionCalling) {
if (useSimulatedFunctionCalling) {
const wrapped = wrapWithSimulatedFunctionCalling({
system,
messages,
Expand Down Expand Up @@ -87,7 +91,7 @@ export const inferenceAdapter: InferenceConnectorAdapter = {
}),
processOpenAIStream(),
emitTokenCountEstimateIfMissing({ request }),
simulatedFunctionCalling ? parseInlineFunctionCalls({ logger }) : identity
useSimulatedFunctionCalling ? parseInlineFunctionCalls({ logger }) : identity
);
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const isNativeFunctionCallingSupportedMock = jest.fn();

jest.doMock('../../utils/function_calling_support', () => {
const actual = jest.requireActual('../../utils/function_calling_support');
return {
...actual,
isNativeFunctionCallingSupported: isNativeFunctionCallingSupportedMock,
};
});
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
* 2.0.
*/

import { isNativeFunctionCallingSupportedMock } from './openai_adapter.test.mocks';
import OpenAI from 'openai';
import { v4 } from 'uuid';
import { PassThrough } from 'stream';
import { pick } from 'lodash';
import { lastValueFrom, Subject, toArray, filter } from 'rxjs';
import type { Logger } from '@kbn/logging';
import { loggerMock } from '@kbn/logging-mocks';
import {
ToolChoiceType,
ChatCompletionEventType,
isChatCompletionChunkEvent,
MessageRole,
Expand Down Expand Up @@ -48,21 +49,23 @@ function createOpenAIChunk({

describe('openAIAdapter', () => {
const executorMock = {
getConnector: jest.fn(),
invoke: jest.fn(),
} as InferenceExecutor & { invoke: jest.MockedFn<InferenceExecutor['invoke']> };
} as InferenceExecutor & {
invoke: jest.MockedFn<InferenceExecutor['invoke']>;
getConnector: jest.MockedFn<InferenceExecutor['getConnector']>;
};

const logger = {
debug: jest.fn(),
error: jest.fn(),
} as unknown as Logger;
const logger = loggerMock.create();

beforeEach(() => {
executorMock.invoke.mockReset();
isNativeFunctionCallingSupportedMock.mockReset().mockReturnValue(true);
});

const defaultArgs = {
executor: executorMock,
logger: loggerMock.create(),
logger,
};

describe('when creating the request', () => {
Expand Down Expand Up @@ -359,6 +362,24 @@ describe('openAIAdapter', () => {
});
});

it('uses the right value for functionCalling=auto', () => {
isNativeFunctionCallingSupportedMock.mockReturnValue(false);

openAIAdapter.chatComplete({
logger,
executor: executorMock,
messages: [{ role: MessageRole.User, content: 'question' }],
tools: {
foo: { description: 'my tool' },
},
toolChoice: ToolChoiceType.auto,
functionCalling: 'auto',
});

expect(executorMock.invoke).toHaveBeenCalledTimes(1);
expect(getRequest().body.tools).toBeUndefined();
});

it('propagates the temperature parameter', () => {
openAIAdapter.chatComplete({
logger,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
parseInlineFunctionCalls,
wrapWithSimulatedFunctionCalling,
} from '../../simulated_function_calling';
import { isNativeFunctionCallingSupported } from '../../utils/function_calling_support';
import type { OpenAIRequest } from './types';
import { messagesToOpenAI, toolsToOpenAI, toolChoiceToOpenAI } from './to_openai';
import { processOpenAIStream } from './process_openai_stream';
Expand All @@ -27,15 +28,18 @@ export const openAIAdapter: InferenceConnectorAdapter = {
toolChoice,
tools,
temperature = 0,
functionCalling,
functionCalling = 'auto',
modelName,
logger,
abortSignal,
}) => {
const simulatedFunctionCalling = functionCalling === 'simulated';
const useSimulatedFunctionCalling =
functionCalling === 'auto'
? !isNativeFunctionCallingSupported(executor.getConnector())
: functionCalling === 'simulated';

let request: OpenAIRequest;
if (simulatedFunctionCalling) {
if (useSimulatedFunctionCalling) {
const wrapped = wrapWithSimulatedFunctionCalling({
system,
messages,
Expand Down Expand Up @@ -86,7 +90,7 @@ export const openAIAdapter: InferenceConnectorAdapter = {
}),
processOpenAIStream(),
emitTokenCountEstimateIfMissing({ request }),
simulatedFunctionCalling ? parseInlineFunctionCalls({ logger }) : identity
useSimulatedFunctionCalling ? parseInlineFunctionCalls({ logger }) : identity
);
},
};
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,11 @@
import type OpenAI from 'openai';

export type OpenAIRequest = Omit<OpenAI.ChatCompletionCreateParams, 'model'> & { model?: string };

// duplicated from x-pack/platform/plugins/shared/stack_connectors/common/openai/constants.ts
// because depending on stack_connectors from the inference plugin creates a cyclic dependency...
export enum OpenAiProviderType {
OpenAi = 'OpenAI',
AzureAi = 'Azure OpenAI',
Other = 'Other',
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { OpenAiProviderType } from '../adapters/openai/types';
import { InferenceConnector, InferenceConnectorType } from '@kbn/inference-common';
import { isNativeFunctionCallingSupported } from './function_calling_support';

const createConnector = (
parts: Partial<InferenceConnector> & Pick<InferenceConnector, 'type'>
): InferenceConnector => {
return {
connectorId: 'connector-id',
name: 'my connector',
config: {},
...parts,
};
};

describe('isNativeFunctionCallingSupported', () => {
it('returns true for gemini connector', () => {
const connector = createConnector({ type: InferenceConnectorType.Gemini });
expect(isNativeFunctionCallingSupported(connector)).toBe(true);
});

it('returns true for bedrock connector', () => {
const connector = createConnector({ type: InferenceConnectorType.Bedrock });
expect(isNativeFunctionCallingSupported(connector)).toBe(true);
});

it('returns true for inference connector', () => {
const connector = createConnector({ type: InferenceConnectorType.Inference });
expect(isNativeFunctionCallingSupported(connector)).toBe(true);
});

describe('openAI connector', () => {
it('returns true for "OpenAI" provider', () => {
const connector = createConnector({
type: InferenceConnectorType.OpenAI,
config: { apiProvider: OpenAiProviderType.OpenAi },
});
expect(isNativeFunctionCallingSupported(connector)).toBe(true);
});

it('returns true for "Azure" provider', () => {
const connector = createConnector({
type: InferenceConnectorType.OpenAI,
config: { apiProvider: OpenAiProviderType.AzureAi },
});
expect(isNativeFunctionCallingSupported(connector)).toBe(true);
});

it('returns false for "Other" provider', () => {
const connector = createConnector({
type: InferenceConnectorType.OpenAI,
config: { apiProvider: OpenAiProviderType.Other },
});
expect(isNativeFunctionCallingSupported(connector)).toBe(false);
});
});
});
Loading
Loading