diff --git a/openapi.yaml b/openapi.yaml index b4abc85..f2177da 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -3,75 +3,141 @@ openapi: 3.1.0 info: title: Inference Gateway API description: | - API for interacting with various language models through the Inference Gateway. + The API for interacting with various language models and other AI services. + OpenAI, Groq, Ollama, and other providers are supported. + OpenAI compatible API for using with existing clients. + Unified API for all providers. + contact: + name: Inference Gateway + url: https://inference-gateway.github.io/docs/ version: 1.0.0 + license: + name: MIT + url: https://github.com/inference-gateway/inference-gateway/blob/main/LICENSE servers: - url: http://localhost:8080 + description: Default server without version prefix for healthcheck and proxy and points + x-server-tags: ["Health", "Proxy"] + - url: http://localhost:8080/v1 + description: Default server with version prefix for listing models and chat completions + x-server-tags: ["Models", "Completions"] + - url: https://api.inference-gateway.local/v1 + description: Local server with version prefix for listing models and chat completions + x-server-tags: ["Models", "Completions"] +tags: + - name: Models + description: List and describe the various models available in the API. + - name: Completions + description: Generate completions from the models. + - name: Proxy + description: Proxy requests to provider endpoints. + - name: Health + description: Health check paths: - /llms: + /models: get: - summary: List all language models operationId: listModels + tags: + - Models + description: | + Lists the currently available models, and provides basic information + about each one such as the owner and availability. + summary: + Lists the currently available models, and provides basic information + about each one such as the owner and availability. security: - bearerAuth: [] - responses: - "200": - description: A list of models by provider - content: - application/json: - schema: - type: array - items: - $ref: "#/components/schemas/ListModelsResponse" - "401": - $ref: "#/components/responses/Unauthorized" - /llms/{provider}: - get: - summary: List all models for a specific provider - operationId: listModelsByProvider parameters: - name: provider - in: path - required: true + in: query + required: false schema: - $ref: "#/components/schemas/Providers" - security: - - bearerAuth: [] + $ref: "#/components/schemas/Provider" + description: Specific provider to query (optional) responses: "200": - description: A list of models + description: List of available models content: application/json: schema: $ref: "#/components/schemas/ListModelsResponse" - "400": - $ref: "#/components/responses/BadRequest" + examples: + allProviders: + summary: Models from all providers + value: + object: "list" + data: + - id: "gpt-4o" + object: "model" + created: 1686935002 + owned_by: "openai" + - id: "llama-3.3-70b-versatile" + object: "model" + created: 1723651281 + owned_by: "groq" + - id: "claude-3-opus-20240229" + object: "model" + created: 1708905600 + owned_by: "anthropic" + - id: "command-r" + object: "model" + created: 1707868800 + owned_by: "cohere" + - id: "phi3:3.8b" + object: "model" + created: 1718441600 + owned_by: "ollama" + singleProvider: + summary: Models from a specific provider + value: + object: "list" + data: + - id: "gpt-4o" + object: "model" + created: 1686935002 + owned_by: "openai" + - id: "gpt-4-turbo" + object: "model" + created: 1687882410 + owned_by: "openai" + - id: "gpt-3.5-turbo" + object: "model" + created: 1677649963 + owned_by: "openai" "401": $ref: "#/components/responses/Unauthorized" - /llms/{provider}/generate: + "500": + $ref: "#/components/responses/InternalError" + /chat/completions: post: - summary: Generate content with a specific provider's LLM - operationId: generateContent + operationId: createChatCompletion + tags: + - Completions + description: | + Generates a chat completion based on the provided input. + The completion can be streamed to the client as it is generated. + summary: Create a chat completion + security: + - bearerAuth: [] parameters: - name: provider - in: path - required: true + in: query + required: false schema: - $ref: "#/components/schemas/Providers" - security: - - bearerAuth: [] + $ref: "#/components/schemas/Provider" + description: Specific provider to use (default determined by model) requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/GenerateRequest" + $ref: "#/components/requestBodies/CreateChatCompletionRequest" responses: "200": - description: Generated content + description: Successful response content: application/json: schema: - $ref: "#/components/schemas/GenerateResponse" + $ref: "#/components/schemas/CreateChatCompletionResponse" + text/event-stream: + schema: + $ref: "#/components/schemas/SSEvent" "400": $ref: "#/components/responses/BadRequest" "401": @@ -84,7 +150,7 @@ paths: in: path required: true schema: - $ref: "#/components/schemas/Providers" + $ref: "#/components/schemas/Provider" - name: path in: path required: true @@ -94,8 +160,14 @@ paths: type: string description: The remaining path to proxy to the provider get: - summary: Proxy GET request to provider operationId: proxyGet + tags: + - Proxy + description: | + Proxy GET request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy GET request to provider responses: "200": $ref: "#/components/responses/ProviderResponse" @@ -108,8 +180,14 @@ paths: security: - bearerAuth: [] post: - summary: Proxy POST request to provider operationId: proxyPost + tags: + - Proxy + description: | + Proxy POST request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy POST request to provider requestBody: $ref: "#/components/requestBodies/ProviderRequest" responses: @@ -124,8 +202,14 @@ paths: security: - bearerAuth: [] put: - summary: Proxy PUT request to provider operationId: proxyPut + tags: + - Proxy + description: | + Proxy PUT request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy PUT request to provider requestBody: $ref: "#/components/requestBodies/ProviderRequest" responses: @@ -140,8 +224,14 @@ paths: security: - bearerAuth: [] delete: - summary: Proxy DELETE request to provider operationId: proxyDelete + tags: + - Proxy + description: | + Proxy DELETE request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy DELETE request to provider responses: "200": $ref: "#/components/responses/ProviderResponse" @@ -154,8 +244,14 @@ paths: security: - bearerAuth: [] patch: - summary: Proxy PATCH request to provider operationId: proxyPatch + tags: + - Proxy + description: | + Proxy PATCH request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy PATCH request to provider requestBody: $ref: "#/components/requestBodies/ProviderRequest" responses: @@ -171,6 +267,12 @@ paths: - bearerAuth: [] /health: get: + operationId: healthCheck + tags: + - Health + description: | + Health check endpoint + Returns a 200 status code if the service is healthy summary: Health check responses: "200": @@ -200,25 +302,34 @@ components: type: string temperature: type: number - format: float64 + format: float default: 0.7 - examples: - - openai: - summary: OpenAI chat completion request - value: - model: "gpt-3.5-turbo" - messages: - - role: "user" - content: "Hello! How can I assist you today?" - temperature: 0.7 - - anthropic: - summary: Anthropic Claude request - value: - model: "claude-3-opus-20240229" - messages: - - role: "user" - content: "Explain quantum computing" - temperature: 0.5 + examples: + openai: + summary: OpenAI chat completion request + value: + model: "gpt-3.5-turbo" + messages: + - role: "user" + content: "Hello! How can I assist you today?" + temperature: 0.7 + anthropic: + summary: Anthropic Claude request + value: + model: "claude-3-opus-20240229" + messages: + - role: "user" + content: "Explain quantum computing" + temperature: 0.5 + CreateChatCompletionRequest: + required: true + description: | + ProviderRequest depends on the specific provider and endpoint being called + If you decide to use this approach, please follow the provider-specific documentations. + content: + application/json: + schema: + $ref: "#/components/schemas/CreateChatCompletionRequest" responses: BadRequest: description: Bad request @@ -278,7 +389,7 @@ components: To enable authentication, set ENABLE_AUTH to true. When enabled, requests must include a valid JWT token in the Authorization header. schemas: - Providers: + Provider: type: string enum: - ollama @@ -287,36 +398,137 @@ components: - cloudflare - cohere - anthropic + - deepseek + x-provider-configs: + ollama: + id: "ollama" + url: "http://ollama:8080/v1" + auth_type: "none" + endpoints: + models: + name: "list_models" + method: "GET" + endpoint: "/models" + chat: + name: "chat_completions" + method: "POST" + endpoint: "/chat/completions" + anthropic: + id: "anthropic" + url: "https://api.anthropic.com/v1" + auth_type: "bearer" + endpoints: + models: + name: "list_models" + method: "GET" + endpoint: "/models" + chat: + name: "chat_completions" + method: "POST" + endpoint: "/chat/completions" + cohere: + id: "cohere" + url: "https://api.cohere.ai" + auth_type: "bearer" + endpoints: + models: + name: "list_models" + method: "GET" + endpoint: "/v1/models" + chat: + name: "chat_completions" + method: "POST" + endpoint: "/compatibility/v1/chat/completions" + groq: + id: "groq" + url: "https://api.groq.com/openai/v1" + auth_type: "bearer" + endpoints: + models: + name: "list_models" + method: "GET" + endpoint: "/models" + chat: + name: "chat_completions" + method: "POST" + endpoint: "/chat/completions" + openai: + id: "openai" + url: "https://api.openai.com/v1" + auth_type: "bearer" + endpoints: + models: + name: "list_models" + method: "GET" + endpoint: "/models" + chat: + name: "chat_completions" + method: "POST" + endpoint: "/chat/completions" + cloudflare: + id: "cloudflare" + url: "https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai" + auth_type: "bearer" + endpoints: + models: + name: "list_models" + method: "GET" + endpoint: "/finetunes/public?limit=1000" + chat: + name: "chat_completions" + method: "POST" + endpoint: "/v1/chat/completions" + deepseek: + id: "deepseek" + url: "https://api.deepseek.com" + auth_type: "bearer" + endpoints: + models: + name: "list_models" + method: "GET" + endpoint: "/models" + chat: + name: "chat_completions" + method: "POST" + endpoint: "/chat/completions" ProviderSpecificResponse: type: object description: | Provider-specific response format. Examples: - OpenAI GET /v1/models response: + OpenAI GET /v1/models?provider=openai response: ```json { + "provider": "openai", + "object": "list", "data": [ { "id": "gpt-4", "object": "model", - "created": 1687882410 + "created": 1687882410, + "owned_by": "openai", + "served_by": "openai" } ] } ``` - Anthropic GET /v1/models response: + Anthropic GET /v1/models?provider=anthropic response: ```json { - "models": [ + "provider": "anthropic", + "object": "list", + "data": [ { - "name": "claude-3-opus-20240229", - "description": "Most capable model for highly complex tasks" + "id": "gpt-4", + "object": "model", + "created": 1687882410, + "owned_by": "openai", + "served_by": "openai" } ] } ``` - additionalProperties: true ProviderAuthType: type: string description: Authentication type for providers @@ -325,6 +537,31 @@ components: - xheader - query - none + SSEvent: + type: object + properties: + event: + type: string + enum: + - message-start + - stream-start + - content-start + - content-delta + - content-end + - message-end + - stream-end + data: + type: string + format: byte + retry: + type: integer + Endpoints: + type: object + properties: + models: + type: string + chat: + type: string Error: type: object properties: @@ -337,6 +574,7 @@ components: - system - user - assistant + - tool Message: type: object description: Message structure for provider requests @@ -345,72 +583,474 @@ components: $ref: "#/components/schemas/MessageRole" content: type: string + tool_calls: + type: array + items: + $ref: "#/components/schemas/ChatCompletionMessageToolCall" + tool_call_id: + type: string + reasoning: + type: string + reasoning_content: + type: string + required: + - role + - content Model: type: object description: Common model information properties: - name: + id: + type: string + object: type: string + created: + type: integer + format: int64 + owned_by: + type: string + served_by: + $ref: "#/components/schemas/Provider" ListModelsResponse: type: object description: Response structure for listing models properties: provider: - $ref: "#/components/schemas/Providers" - models: + $ref: "#/components/schemas/Provider" + object: + type: string + data: type: array items: $ref: "#/components/schemas/Model" - GenerateRequest: + default: [] + FunctionObject: type: object - description: Request structure for token generation + properties: + description: + type: string + description: + A description of what the function does, used by the model to + choose when and how to call the function. + name: + type: string + description: + The name of the function to be called. Must be a-z, A-Z, 0-9, or + contain underscores and dashes, with a maximum length of 64. + parameters: + $ref: "#/components/schemas/FunctionParameters" + strict: + type: boolean + default: false + description: + Whether to enable strict schema adherence when generating the + function call. If set to true, the model will follow the exact + schema defined in the `parameters` field. Only a subset of JSON + Schema is supported when `strict` is `true`. Learn more about + Structured Outputs in the [function calling + guide](docs/guides/function-calling). required: - - model - - messages + - name + ChatCompletionTool: + type: object + properties: + type: + $ref: "#/components/schemas/ChatCompletionToolType" + function: + $ref: "#/components/schemas/FunctionObject" + required: + - type + - function + FunctionParameters: + type: object + description: >- + The parameters the functions accepts, described as a JSON Schema object. + See the [guide](/docs/guides/function-calling) for examples, and the + [JSON Schema + reference](https://json-schema.org/understanding-json-schema/) for + documentation about the format. + + Omitting `parameters` defines a function with an empty parameter list. + properties: + type: + type: string + description: The type of the parameters. Currently, only `object` is supported. + properties: + type: object + description: The properties of the parameters. + required: + type: array + items: + type: string + description: The required properties of the parameters. + ChatCompletionToolType: + type: string + description: The type of the tool. Currently, only `function` is supported. + enum: + - function + CompletionUsage: + type: object + description: Usage statistics for the completion request. + properties: + completion_tokens: + type: integer + default: 0 + format: int64 + description: Number of tokens in the generated completion. + prompt_tokens: + type: integer + default: 0 + format: int64 + description: Number of tokens in the prompt. + total_tokens: + type: integer + default: 0 + format: int64 + description: Total number of tokens used in the request (prompt + completion). + required: + - prompt_tokens + - completion_tokens + - total_tokens + ChatCompletionStreamOptions: + description: > + Options for streaming response. Only set this when you set `stream: + true`. + type: object + properties: + include_usage: + type: boolean + description: > + If set, an additional chunk will be streamed before the `data: + [DONE]` message. The `usage` field on this chunk shows the token + usage statistics for the entire request, and the `choices` field + will always be an empty array. All other chunks will also include a + `usage` field, but with a null value. + default: true + CreateChatCompletionRequest: + type: object properties: model: type: string + description: Model ID to use messages: + description: > + A list of messages comprising the conversation so far. type: array + minItems: 1 items: $ref: "#/components/schemas/Message" + max_tokens: + description: > + An upper bound for the number of tokens that can be generated + for a completion, including visible output tokens and reasoning tokens. + type: integer stream: + description: > + If set to true, the model response data will be streamed to the + client as it is generated using [server-sent + events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). type: boolean default: false - description: Whether to stream tokens as they are generated in raw json - ssevents: - type: boolean - default: false - description: | - Whether to use Server-Sent Events for token generation. - When enabled, the response will be streamed as SSE with the following event types: - - message-start: Initial message event with assistant role - - stream-start: Stream initialization - - content-start: Content beginning - - content-delta: Content update with new tokens - - content-end: Content completion - - message-end: Message completion - - stream-end: Stream completion + stream_options: + $ref: "#/components/schemas/ChatCompletionStreamOptions" + tools: + type: array + description: > + A list of tools the model may call. Currently, only functions + are supported as a tool. Use this to provide a list of functions + the model may generate JSON inputs for. A max of 128 functions + are supported. + items: + $ref: "#/components/schemas/ChatCompletionTool" + required: + - model + - messages + ChatCompletionMessageToolCallFunction: + type: object + description: The function that the model called. + properties: + name: + type: string + description: The name of the function to call. + arguments: + type: string + description: + The arguments to call the function with, as generated by the model + in JSON format. Note that the model does not always generate + valid JSON, and may hallucinate parameters not defined by your + function schema. Validate the arguments in your code before + calling your function. + required: + - name + - arguments + ChatCompletionMessageToolCall: + type: object + properties: + id: + type: string + description: The ID of the tool call. + type: + $ref: "#/components/schemas/ChatCompletionToolType" + function: + $ref: "#/components/schemas/ChatCompletionMessageToolCallFunction" + required: + - id + - type + - function + ChatCompletionChoice: + type: object + properties: + finish_reason: + type: string + description: > + The reason the model stopped generating tokens. This will be + `stop` if the model hit a natural stop point or a provided + stop sequence, + + `length` if the maximum number of tokens specified in the + request was reached, - **Note:** Depending on the provider, some events may not be present. - ResponseTokens: + `content_filter` if content was omitted due to a flag from our + content filters, + + `tool_calls` if the model called a tool. + enum: + - stop + - length + - tool_calls + - content_filter + - function_call + index: + type: integer + description: The index of the choice in the list of choices. + message: + $ref: "#/components/schemas/Message" + required: + - finish_reason + - index + - message + - logprobs + ChatCompletionStreamChoice: type: object - description: Token response structure + required: + - delta + - finish_reason + - index properties: - role: + delta: + $ref: "#/components/schemas/ChatCompletionStreamResponseDelta" + logprobs: + description: Log probability information for the choice. + type: object + properties: + content: + description: A list of message content tokens with log probability information. + type: array + items: + $ref: "#/components/schemas/ChatCompletionTokenLogprob" + refusal: + description: A list of message refusal tokens with log probability information. + type: array + items: + $ref: "#/components/schemas/ChatCompletionTokenLogprob" + required: + - content + - refusal + finish_reason: + $ref: "#/components/schemas/FinishReason" + index: + type: integer + description: The index of the choice in the list of choices. + CreateChatCompletionResponse: + type: object + description: + Represents a chat completion response returned by model, based on + the provided input. + properties: + id: type: string + description: A unique identifier for the chat completion. + choices: + type: array + description: + A list of chat completion choices. Can be more than one if `n` is + greater than 1. + items: + $ref: "#/components/schemas/ChatCompletionChoice" + created: + type: integer + description: + The Unix timestamp (in seconds) of when the chat completion was + created. model: type: string + description: The model used for the chat completion. + object: + type: string + description: The object type, which is always `chat.completion`. + x-stainless-const: true + usage: + $ref: "#/components/schemas/CompletionUsage" + required: + - choices + - created + - id + - model + - object + ChatCompletionStreamResponseDelta: + type: object + description: A chat completion delta generated by streamed model responses. + properties: content: type: string - GenerateResponse: + description: The contents of the chunk message. + tool_calls: + type: array + items: + $ref: "#/components/schemas/ChatCompletionMessageToolCallChunk" + role: + $ref: "#/components/schemas/MessageRole" + refusal: + type: string + description: The refusal message generated by the model. + ChatCompletionMessageToolCallChunk: type: object - description: Response structure for token generation properties: - provider: + index: + type: integer + id: + type: string + description: The ID of the tool call. + type: + type: string + description: The type of the tool. Currently, only `function` is supported. + function: + type: object + properties: + name: + type: string + description: The name of the function to call. + arguments: + type: string + description: + The arguments to call the function with, as generated by the model + in JSON format. Note that the model does not always generate + valid JSON, and may hallucinate parameters not defined by your + function schema. Validate the arguments in your code before + calling your function. + required: + - index + ChatCompletionTokenLogprob: + type: object + properties: + token: &a1 + description: The token. + type: string + logprob: &a2 + description: + The log probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value `-9999.0` is used to signify + that the token is very unlikely. + type: number + bytes: &a3 + description: + A list of integers representing the UTF-8 bytes representation of + the token. Useful in instances where characters are represented by + multiple tokens and their byte representations must be combined to + generate the correct text representation. Can be `null` if there is + no bytes representation for the token. + type: array + items: + type: integer + top_logprobs: + description: + List of the most likely tokens and their log probability, at this + token position. In rare cases, there may be fewer than the number of + requested `top_logprobs` returned. + type: array + items: + type: object + properties: + token: *a1 + logprob: *a2 + bytes: *a3 + required: + - token + - logprob + - bytes + required: + - token + - logprob + - bytes + - top_logprobs + FinishReason: + type: string + description: > + The reason the model stopped generating tokens. This will be + `stop` if the model hit a natural stop point or a provided + stop sequence, + + `length` if the maximum number of tokens specified in the + request was reached, + + `content_filter` if content was omitted due to a flag from our + content filters, + + `tool_calls` if the model called a tool. + enum: + - stop + - length + - tool_calls + - content_filter + - function_call + CreateChatCompletionStreamResponse: + type: object + description: | + Represents a streamed chunk of a chat completion response returned + by the model, based on the provided input. + properties: + id: type: string - response: - $ref: "#/components/schemas/ResponseTokens" + description: + A unique identifier for the chat completion. Each chunk has the + same ID. + choices: + type: array + description: > + A list of chat completion choices. Can contain more than one + elements if `n` is greater than 1. Can also be empty for the + + last chunk if you set `stream_options: {"include_usage": true}`. + items: + $ref: "#/components/schemas/ChatCompletionStreamChoice" + created: + type: integer + description: + The Unix timestamp (in seconds) of when the chat completion was + created. Each chunk has the same timestamp. + model: + type: string + description: The model to generate the completion. + system_fingerprint: + type: string + description: > + This fingerprint represents the backend configuration that the model + runs with. + + Can be used in conjunction with the `seed` request parameter to + understand when backend changes have been made that might impact + determinism. + object: + type: string + description: The object type, which is always `chat.completion.chunk`. + usage: + $ref: "#/components/schemas/CompletionUsage" + required: + - choices + - created + - id + - model + - object Config: x-config: sections: @@ -526,7 +1166,7 @@ components: - name: anthropic_api_url env: "ANTHROPIC_API_URL" type: string - default: "https://api.anthropic.com" + default: "https://api.anthropic.com/v1" description: "Anthropic API URL" - name: anthropic_api_key env: "ANTHROPIC_API_KEY" @@ -536,7 +1176,7 @@ components: - name: cloudflare_api_url env: "CLOUDFLARE_API_URL" type: string - default: "https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}" + default: "https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai" description: "Cloudflare API URL" - name: cloudflare_api_key env: "CLOUDFLARE_API_KEY" @@ -546,7 +1186,7 @@ components: - name: cohere_api_url env: "COHERE_API_URL" type: string - default: "https://api.cohere.com" + default: "https://api.cohere.ai" description: "Cohere API URL" - name: cohere_api_key env: "COHERE_API_KEY" @@ -556,7 +1196,7 @@ components: - name: groq_api_url env: "GROQ_API_URL" type: string - default: "https://api.groq.com" + default: "https://api.groq.com/openai/v1" description: "Groq API URL" - name: groq_api_key env: "GROQ_API_KEY" @@ -566,7 +1206,7 @@ components: - name: ollama_api_url env: "OLLAMA_API_URL" type: string - default: "http://ollama:8080" + default: "http://ollama:8080/v1" description: "Ollama API URL" - name: ollama_api_key env: "OLLAMA_API_KEY" @@ -576,451 +1216,20 @@ components: - name: openai_api_url env: "OPENAI_API_URL" type: string - default: "https://api.openai.com" + default: "https://api.openai.com/v1" description: "OpenAI API URL" - name: openai_api_key env: "OPENAI_API_KEY" type: string description: "OpenAI API Key" secret: true - x-provider-configs: - ollama: - id: "ollama" - url: "http://ollama:8080" - auth_type: "none" - endpoints: - list: - endpoint: "/api/tags" - method: "GET" - schema: - response: - type: object - properties: - models: - type: array - items: - type: object - properties: - name: - type: string - modified_at: - type: string - size: - type: integer - digest: - type: string - details: - type: object - properties: - format: - type: string - family: - type: string - families: - type: array - items: - type: string - parameter_size: - type: string - generate: - endpoint: "/api/generate" - method: "POST" - schema: - request: - type: object - properties: - model: - type: string - prompt: - type: string - stream: - type: boolean - system: - type: string - temperature: - type: number - format: float64 - default: 0.7 - response: - type: object - properties: - provider: - type: string - response: - type: object - properties: - role: - type: string - model: - type: string - content: - type: string - openai: - id: "openai" - url: "https://api.openai.com" - auth_type: "bearer" - endpoints: - list: - endpoint: "/v1/models" - method: "GET" - schema: - response: - type: object - properties: - object: - type: string - data: - type: array - items: - type: object - properties: - id: - type: string - object: - type: string - created: - type: integer - format: int64 - owned_by: - type: string - permission: - type: array - items: - type: object - properties: - id: - type: string - object: - type: string - created: - type: integer - format: int64 - allow_create_engine: - type: boolean - allow_sampling: - type: boolean - allow_logprobs: - type: boolean - allow_search_indices: - type: boolean - allow_view: - type: boolean - allow_fine_tuning: - type: boolean - root: - type: string - parent: - type: string - generate: - endpoint: "/v1/chat/completions" - method: "POST" - schema: - request: - type: object - properties: - model: - type: string - messages: - type: array - items: - type: object - properties: - role: - type: string - content: - type: string - temperature: - type: number - format: float64 - default: 0.7 - response: - type: object - properties: - model: - type: string - choices: - type: array - items: - type: object - properties: - message: - type: object - properties: - role: - type: string - content: - type: string - groq: - id: "groq" - url: "https://api.groq.com" - auth_type: "bearer" - endpoints: - list: - endpoint: "/openai/v1/models" - method: "GET" - schema: - response: - type: object - properties: - object: - type: string - data: - type: array - items: - type: object - properties: - id: - type: string - object: - type: string - created: - type: integer - format: int64 - owned_by: - type: string - active: - type: boolean - context_window: - type: integer - public_apps: - type: object - generate: - endpoint: "/openai/v1/chat/completions" - method: "POST" - schema: - request: - type: object - properties: - model: - type: string - messages: - type: array - items: - type: object - properties: - role: - type: string - content: - type: string - temperature: - type: number - format: float64 - default: 0.7 - response: - type: object - properties: - model: - type: string - choices: - type: array - items: - type: object - properties: - message: - type: object - properties: - role: - type: string - content: - type: string - cloudflare: - id: "cloudflare" - url: "https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}" - auth_type: "bearer" - endpoints: - list: - endpoint: "/ai/finetunes/public" - method: "GET" - schema: - response: - type: object - properties: - result: - type: array - items: - type: object - properties: - id: - type: string - name: - type: string - description: - type: string - created_at: - type: string - modified_at: - type: string - public: - type: integer - model: - type: string - generate: - endpoint: "/v1/chat/completions" - method: "POST" - schema: - request: - type: object - properties: - prompt: - type: string - model: - type: string - temperature: - type: number - format: float64 - default: 0.7 - response: - type: object - properties: - result: - type: object - properties: - response: - type: string - cohere: - id: "cohere" - url: "https://api.cohere.com" - auth_type: "bearer" - endpoints: - list: - endpoint: "/v1/models" - method: "GET" - schema: - response: - type: object - properties: - models: - type: array - items: - type: object - properties: - name: - type: string - endpoints: - type: array - items: - type: string - finetuned: - type: boolean - context_length: - type: number - format: float64 - tokenizer_url: - type: string - default_endpoints: - type: array - items: - type: string - next_page_token: - type: string - generate: - endpoint: "/v2/chat" - method: "POST" - schema: - request: - type: object - properties: - model: - type: string - messages: - type: array - items: - type: object - properties: - role: - type: string - content: - type: string - temperature: - type: number - format: float64 - default: 0.7 - response: - type: object - properties: - message: - type: object - properties: - role: - type: string - content: - type: array - items: - type: object - properties: - type: - type: string - text: - type: string - anthropic: - id: "anthropic" - url: "https://api.anthropic.com" - auth_type: "xheader" - extra_headers: - anthropic-version: "2023-06-01" - endpoints: - list: - endpoint: "/v1/models" - method: "GET" - schema: - response: - type: object - properties: - models: - type: array - items: - type: object - properties: - type: - type: string - id: - type: string - display_name: - type: string - created_at: - type: string - has_more: - type: boolean - first_id: - type: string - last_id: - type: string - generate: - endpoint: "/v1/messages" - method: "POST" - schema: - request: - type: object - properties: - model: - type: string - messages: - type: array - items: - type: object - properties: - role: - type: string - content: - type: string - temperature: - type: number - format: float64 - default: 0.7 - response: - type: object - properties: - model: - type: string - choices: - type: array - items: - type: object - properties: - message: - type: object - properties: - role: - type: string - content: - type: string + - name: deepseek_api_url + env: "DEEPSEEK_API_URL" + type: string + default: "https://api.deepseek.com" + description: "DeepSeek API URL" + - name: deepseek_api_key + env: "DEEPSEEK_API_KEY" + type: string + description: "DeepSeek API Key" + secret: true