mattf
diff --git a/‎docs/source/providers/inference/remote_cerebras.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/providers/inference/remote_cerebras.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_stack/providers/remote/inference/cerebras/cerebras.py‎
Lines changed: 14 additions & 9 deletions b/‎llama_stack/providers/remote/inference/cerebras/cerebras.py‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎llama_stack/providers/remote/inference/cerebras/config.py‎
Lines changed: 2 additions & 2 deletions b/‎llama_stack/providers/remote/inference/cerebras/config.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/integration/inference/test_openai_completion.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/integration/inference/test_openai_completion.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/integration/recordings/responses/0547d0909f24.json‎
Lines changed: 53 additions & 0 deletions b/‎tests/integration/recordings/responses/0547d0909f24.json‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎tests/integration/recordings/responses/0648374e43e7.json‎
Lines changed: 146 additions & 0 deletions b/‎tests/integration/recordings/responses/0648374e43e7.json‎
Lines changed: 146 additions & 0 deletions
@@ -9,7 +9,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Cerebras API Key |
+| `api_key` | `<class 'pydantic.types.SecretStr'>` | No |  | Cerebras API Key |
 
 ## Sample Configuration
 
 
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 from collections.abc import AsyncGenerator
+from urllib.parse import urljoin
 
 from cerebras.cloud.sdk import AsyncCerebras
 
@@ -35,14 +36,13 @@
     ModelRegistryHelper,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionToLlamaStackMixin,
-    OpenAICompletionToLlamaStackMixin,
     get_sampling_options,
     process_chat_completion_response,
     process_chat_completion_stream_response,
     process_completion_response,
     process_completion_stream_response,
 )
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack.providers.utils.inference.prompt_adapter import (
     chat_completion_request_to_prompt,
     completion_request_to_prompt,
@@ -53,10 +53,9 @@
 
 
 class CerebrasInferenceAdapter(
+    OpenAIMixin,
     ModelRegistryHelper,
     Inference,
-    OpenAIChatCompletionToLlamaStackMixin,
-    OpenAICompletionToLlamaStackMixin,
 ):
     def __init__(self, config: CerebrasImplConfig) -> None:
         ModelRegistryHelper.__init__(
@@ -66,11 +65,17 @@ def __init__(self, config: CerebrasImplConfig) -> None:
         self.config = config
 
         # TODO: make this use provider data, etc. like other providers
-        self.client = AsyncCerebras(
+        self._cerebras_client = AsyncCerebras(
             base_url=self.config.base_url,
             api_key=self.config.api_key.get_secret_value(),
         )
 
+    def get_api_key(self) -> str:
+        return self.config.api_key.get_secret_value()
+
+    def get_base_url(self) -> str:
+        return urljoin(self.config.base_url, "v1")
+
     async def initialize(self) -> None:
         return
 
@@ -107,14 +112,14 @@ async def completion(
     async def _nonstream_completion(self, request: CompletionRequest) -> CompletionResponse:
         params = await self._get_params(request)
 
-        r = await self.client.completions.create(**params)
+        r = await self._cerebras_client.completions.create(**params)
 
         return process_completion_response(r)
 
     async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
         params = await self._get_params(request)
 
-        stream = await self.client.completions.create(**params)
+        stream = await self._cerebras_client.completions.create(**params)
 
         async for chunk in process_completion_stream_response(stream):
             yield chunk
@@ -156,14 +161,14 @@ async def chat_completion(
     async def _nonstream_chat_completion(self, request: CompletionRequest) -> CompletionResponse:
         params = await self._get_params(request)
 
-        r = await self.client.completions.create(**params)
+        r = await self._cerebras_client.completions.create(**params)
 
         return process_chat_completion_response(r, request)
 
     async def _stream_chat_completion(self, request: CompletionRequest) -> AsyncGenerator:
         params = await self._get_params(request)
 
-        stream = await self.client.completions.create(**params)
+        stream = await self._cerebras_client.completions.create(**params)
 
         async for chunk in process_chat_completion_stream_response(stream, request):
             yield chunk
 
@@ -20,8 +20,8 @@ class CerebrasImplConfig(BaseModel):
         default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
         description="Base URL for the Cerebras API",
     )
-    api_key: SecretStr | None = Field(
-        default=os.environ.get("CEREBRAS_API_KEY"),
+    api_key: SecretStr = Field(
+        default=SecretStr(os.environ.get("CEREBRAS_API_KEY")),
         description="Cerebras API Key",
     )
 
 
@@ -40,7 +40,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
         "inline::sentence-transformers",
         "inline::vllm",
         "remote::bedrock",
-        "remote::cerebras",
         "remote::databricks",
         # Technically Nvidia does support OpenAI completions, but none of their hosted models
         # support both completions and chat completions endpoint and all the Llama models are
@@ -98,6 +97,8 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
         #  the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
         "remote::tgi",  # TGI ignores n param silently
         "remote::together",  # `n` > 1 is not supported when streaming tokens. Please disable `stream`
+        # Error code 400 - {'message': '"n" > 1 is not currently supported', 'type': 'invalid_request_error', 'param': 'n', 'code': 'wrong_api_format'}
+        "remote::cerebras",
     ):
         pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
 
@@ -109,7 +110,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
         "inline::sentence-transformers",
         "inline::vllm",
         "remote::bedrock",
-        "remote::cerebras",
         "remote::databricks",
         "remote::runpod",
         "remote::watsonx",  # watsonx returns 404 when hitting the /openai/v1 endpoint
 
@@ -0,0 +1,53 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "chatcmpl-6438a448-bbbd-4da1-af88-19390676b0e9",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": " blue, sugar is white, but my heart is ________________________.\nA) black\nB) pink\nC) blank\nD) broken\nMy answer is D) broken. This is because the traditional romantic poem has a positive tone until it comes to the heart, which represents the speaker's emotional state. The word \"broken\" shows that the speaker is hurting, which adds a element of sadness to the poem. This is a typical way to express sorrow or longing in poetry.\nThe best answer is D.<|eot_id|>"
+          }
+        ],
+        "created": 1758191351,
+        "model": "llama-3.3-70b",
+        "object": "text_completion",
+        "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+        "usage": {
+          "completion_tokens": 105,
+          "prompt_tokens": 26,
+          "total_tokens": 131,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": {
+            "audio_tokens": null,
+            "cached_tokens": 0
+          }
+        },
+        "time_info": {
+          "queue_time": 0.00016155,
+          "prompt_time": 0.001595551,
+          "completion_time": 0.107480394,
+          "total_time": 0.11038637161254883,
+          "created": 1758191351
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
@@ -0,0 +1,146 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": true,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-8b6a9499-1a5f-46dc-96b7-3d2b71eecd99",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191362,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-8b6a9499-1a5f-46dc-96b7-3d2b71eecd99",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "439c86fe5",
+                    "function": {
+                      "arguments": "{\"city\": \"Tokyo\"}",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191362,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-8b6a9499-1a5f-46dc-96b7-3d2b71eecd99",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191362,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": {
+            "completion_tokens": 12,
+            "prompt_tokens": 248,
+            "total_tokens": 260,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": {
+              "audio_tokens": null,
+              "cached_tokens": 0
+            }
+          },
+          "time_info": {
+            "queue_time": 0.00016941,
+            "prompt_time": 0.007276727,
+            "completion_time": 0.00388514,
+            "total_time": 0.013146162033081055,
+            "created": 1758191362
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
Original file line number	Diff line number	Diff line change
`@@ -20,8 +20,8 @@ class CerebrasImplConfig(BaseModel):`
`20`	`20`	`default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),`
`21`	`21`	`description="Base URL for the Cerebras API",`
`22`	`22`	`)`
`23`		`- api_key: SecretStr \| None = Field(`
`24`		`- default=os.environ.get("CEREBRAS_API_KEY"),`
	`23`	`+ api_key: SecretStr = Field(`
	`24`	`+ default=SecretStr(os.environ.get("CEREBRAS_API_KEY")),`
`25`	`25`	`description="Cerebras API Key",`
`26`	`26`	`)`
`27`	`27`