RooCodeInc · mrubens · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/docs/providers/baseten.md b/docs/providers/baseten.md
@@ -0,0 +1,102 @@
+---
+title: "Baseten"
+description: "Learn how to configure and use Baseten's Model APIs with Roo Code. Access frontier open-source models with enterprise-grade performance, reliability, and competitive pricing."
+---
+
+Baseten provides on-demand frontier model APIs designed for production applications, not just experimentation. Built on the Baseten Inference Stack, these APIs deliver optimized inference for leading open-source models from OpenAI, DeepSeek, Moonshot AI, and Alibaba Cloud.
+
+**Website:** [https://www.baseten.co/products/model-apis/](https://www.baseten.co/products/model-apis/)
+
+## Getting an API Key
+
+1. **Sign Up/Sign In:** Go to [Baseten](https://www.baseten.co/) and create an account or sign in.
+
+2. **Navigate to API Keys:** Access your dashboard and go to the API Keys section at [https://app.baseten.co/settings/api_keys](https://app.baseten.co/settings/api_keys).
+
+3. **Create a Key:** Generate a new API key. Give it a descriptive name (e.g., "Roo Code").
+
+4. **Copy the Key:** Copy the API key immediately and store it securely.
+
+## Configuration in Roo Code
+
+1. **Open Roo Code Settings:** Click the settings icon (⚙️) in the Roo Code panel or open VS Code settings.
+
+2. **Select Provider:** Choose "Baseten" from the "API Provider" dropdown.
+
+3. **Enter API Key:** Paste your Baseten API key into the "Baseten API Key" field.
+
+4. **Select Model:** Choose your desired model from the "Model" dropdown.
+
+**IMPORTANT: For Kimi K2 Thinking:** To use the `moonshotai/Kimi-K2-Thinking` model, you must enable **Native Tool Call (Experimental)** in Roo Code settings. This setting allows Roo Code to call tools through their native tool processor and is required for this reasoning model to function properly.
+
+## Supported Models
+
+Roo Code supports all current models under Baseten Model APIs, including:
+
+For the most updated pricing, please visit: https://www.baseten.co/products/model-apis/
+
+- `moonshotai/Kimi-K2-Thinking` (Moonshot AI) - Enhanced reasoning capabilities with step-by-step thought processes (262K context) - $0.60/$2.50 per 1M tokens
+
+- `zai-org/GLM-4.6` (Z AI) - Frontier open model with advanced agentic, reasoning and coding capabilities by Z AI (200k context) $0.60/$2.20 per 1M tokens
+
+- `moonshotai/Kimi-K2-Instruct-0905` (Moonshot AI) - September update with enhanced capabilities (262K context) - $0.60/$2.50 per 1M tokens
+
+- `openai/gpt-oss-120b` (OpenAI) - 120B MoE with strong reasoning capabilities (128K context) - $0.10/$0.50 per 1M tokens
+
+- `Qwen/Qwen3-Coder-480B-A35B-Instruct` - Advanced coding and reasoning (262K context) - $0.38/$1.53 per 1M tokens
+
+- `Qwen/Qwen3-235B-A22B-Instruct-2507` - Math and reasoning expert (262K context) - $0.22/$0.80 per 1M tokens
+
+- `deepseek-ai/DeepSeek-R1` - DeepSeek's first-generation reasoning model (163K context) - $2.55/$5.95 per 1M tokens
+
+- `deepseek-ai/DeepSeek-R1-0528` - Latest revision of DeepSeek's reasoning model (163K context) - $2.55/$5.95 per 1M tokens
+
+- `deepseek-ai/DeepSeek-V3.1` - Hybrid reasoning with advanced tool calling (163K context) - $0.50/$1.50 per 1M tokens
+
+- `deepseek-ai/DeepSeek-V3-0324` - Fast general-purpose with enhanced reasoning (163K context) - $0.77/$0.77 per 1M tokens
+
+## Production-First Architecture
+
+Baseten's Model APIs are built for production environments with several key advantages:
+
+### Enterprise-Grade Reliability
+
+- **Four nines of uptime** (99.99%) through active-active redundancy
+- **Cloud-agnostic, multi-cluster autoscaling** for consistent availability
+- **SOC 2 Type II certified** and **HIPAA compliant** for security requirements
+
+### Optimized Performance
+
+- **Pre-optimized models** shipped with the Baseten Inference Stack
+- **Latest-generation GPUs** with multi-cloud infrastructure
+- **Ultra-fast inference** optimized from the bottom up for production workloads
+
+### Cost Efficiency
+
+- **5-10x less expensive** than closed alternatives
+- **Optimized multi-cloud infrastructure** for efficient resource utilization
+- **Transparent pricing** with no hidden costs or rate limit surprises
+
+### Developer Experience
+
+- **OpenAI compatible API** - migrate by swapping a single URL
+- **Drop-in replacement** for closed models with comprehensive observability and analytics
+- **Seamless scaling** from Model APIs to dedicated deployments
+
+## Special Features
+
+### Function Calling & Tool Use
+
+All Baseten models support structured outputs, function calling, and tool use as part of the Baseten Inference Stack, making them ideal for agentic applications and coding workflows.
+
+## Tips and Notes
+
+- **Static Model List:** Roo Code uses a curated list of Baseten models. The default model is `moonshotai/Kimi-K2-Thinking`.
+
+- **Multi-Cloud Capacity Management (MCM):** Baseten's multi-cloud infrastructure ensures high availability and low latency globally.
+
+- **Support:** Baseten provides dedicated support for production deployments and can work with you on dedicated resources as you scale.
+
+## Pricing Information
+
+Current pricing is highly competitive and transparent. For the most up-to-date pricing, visit the [Baseten Model APIs page](https://www.baseten.co/products/model-apis/). Prices typically range from $0.10-$6.00 per million tokens, making Baseten significantly more cost-effective than many closed-model alternatives while providing access to state-of-the-art open-source models.
@@ -246,6 +246,7 @@ export const SECRET_STATE_KEYS = [
 	"featherlessApiKey",
 	"ioIntelligenceApiKey",
 	"vercelAiGatewayApiKey",
+	"basetenApiKey",
 ] as const
 
 // Global secrets that are part of GlobalSettings (not ProviderSettings)

@@ -4,6 +4,7 @@ import { modelInfoSchema, reasoningEffortSettingSchema, verbosityLevelsSchema, s
 import { codebaseIndexProviderSchema } from "./codebase-index.js"
 import {
 	anthropicModels,
+	basetenModels,
 	bedrockModels,
 	cerebrasModels,
 	claudeCodeModels,
@@ -120,6 +121,7 @@ export const providerNames = [
 	...fauxProviders,
 	"anthropic",
 	"bedrock",
+	"baseten",
 	"cerebras",
 	"claude-code",
 	"doubao",
@@ -424,6 +426,10 @@ const vercelAiGatewaySchema = baseProviderSettingsSchema.extend({
 	vercelAiGatewayModelId: z.string().optional(),
 })
 
+const basetenSchema = apiModelIdProviderModelSchema.extend({
+	basetenApiKey: z.string().optional(),
+})
+
 const defaultSchema = z.object({
 	apiProvider: z.undefined(),
 })
@@ -454,6 +460,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv
 	fakeAiSchema.merge(z.object({ apiProvider: z.literal("fake-ai") })),
 	xaiSchema.merge(z.object({ apiProvider: z.literal("xai") })),
 	groqSchema.merge(z.object({ apiProvider: z.literal("groq") })),
+	basetenSchema.merge(z.object({ apiProvider: z.literal("baseten") })),
 	huggingFaceSchema.merge(z.object({ apiProvider: z.literal("huggingface") })),
 	chutesSchema.merge(z.object({ apiProvider: z.literal("chutes") })),
 	litellmSchema.merge(z.object({ apiProvider: z.literal("litellm") })),
@@ -496,6 +503,7 @@ export const providerSettingsSchema = z.object({
 	...fakeAiSchema.shape,
 	...xaiSchema.shape,
 	...groqSchema.shape,
+	...basetenSchema.shape,
 	...huggingFaceSchema.shape,
 	...chutesSchema.shape,
 	...litellmSchema.shape,
@@ -583,6 +591,7 @@ export const modelIdKeysByProvider: Record<TypicalProvider, ModelIdKey> = {
 	requesty: "requestyModelId",
 	xai: "apiModelId",
 	groq: "apiModelId",
+	baseten: "apiModelId",
 	chutes: "apiModelId",
 	litellm: "litellmModelId",
 	huggingface: "huggingFaceModelId",
@@ -715,6 +724,7 @@ export const MODELS_BY_PROVIDER: Record<
 	},
 	xai: { id: "xai", label: "xAI (Grok)", models: Object.keys(xaiModels) },
 	zai: { id: "zai", label: "Zai", models: Object.keys(internationalZAiModels) },
+	baseten: { id: "baseten", label: "BaseTen", models: Object.keys(basetenModels) },
 
 	// Dynamic providers; models pulled from remote APIs.
 	glama: { id: "glama", label: "Glama", models: [] },

@@ -0,0 +1,127 @@
+import type { ModelInfo } from "../model.js"
+
+// Baseten
+// https://baseten.co/products/model-apis/
+
+// Extended ModelInfo to include supportedFeatures, like tools
+export interface BasetenModelInfo extends ModelInfo {
+	supportedFeatures?: string[]
+}
+
+export const basetenModels = {
+	"moonshotai/Kimi-K2-Thinking": {
+		maxTokens: 163_800,
+		contextWindow: 262_000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.6,
+		outputPrice: 2.5,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Kimi K2 Thinking - A model with enhanced reasoning capabilities from Kimi K2",
+	},
+	"zai-org/GLM-4.6": {
+		maxTokens: 200_000,
+		contextWindow: 200_000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.6,
+		outputPrice: 2.2,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Frontier open model with advanced agentic, reasoning and coding capabilities",
+	},
+	"deepseek-ai/DeepSeek-R1": {
+		maxTokens: 131_072,
+		contextWindow: 163_840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 2.55,
+		outputPrice: 5.95,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "DeepSeek's first-generation reasoning model",
+	},
+	"deepseek-ai/DeepSeek-R1-0528": {
+		maxTokens: 131_072,
+		contextWindow: 163_840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 2.55,
+		outputPrice: 5.95,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "The latest revision of DeepSeek's first-generation reasoning model",
+	},
+	"deepseek-ai/DeepSeek-V3-0324": {
+		maxTokens: 131_072,
+		contextWindow: 163_840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.77,
+		outputPrice: 0.77,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Fast general-purpose LLM with enhanced reasoning capabilities",
+	},
+	"deepseek-ai/DeepSeek-V3.1": {
+		maxTokens: 131_072,
+		contextWindow: 163_840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.5,
+		outputPrice: 1.5,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description:
+			"Extremely capable general-purpose LLM with hybrid reasoning capabilities and advanced tool calling",
+	},
+	"Qwen/Qwen3-235B-A22B-Instruct-2507": {
+		maxTokens: 262_144,
+		contextWindow: 262_144,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.22,
+		outputPrice: 0.8,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Mixture-of-experts LLM with math and reasoning capabilities",
+	},
+	"Qwen/Qwen3-Coder-480B-A35B-Instruct": {
+		maxTokens: 262_144,
+		contextWindow: 262_144,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.38,
+		outputPrice: 1.53,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Mixture-of-experts LLM with advanced coding and reasoning capabilities",
+	},
+	"openai/gpt-oss-120b": {
+		maxTokens: 128_072,
+		contextWindow: 128_072,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.1,
+		outputPrice: 0.5,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities",
+	},
+	"moonshotai/Kimi-K2-Instruct-0905": {
+		maxTokens: 168_000,
+		contextWindow: 262_000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.6,
+		outputPrice: 2.5,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0,
+		description: "State of the art language model for agentic and coding tasks. Septemeber Update.",
+	},
+} as const satisfies Record<string, ModelInfo>
+
+export type BasetenModelId = keyof typeof basetenModels
+
+export const basetenDefaultModelId = "moonshotai/Kimi-K2-Thinking" satisfies BasetenModelId
@@ -1,4 +1,5 @@
 export * from "./anthropic.js"
+export * from "./baseten.js"
 export * from "./bedrock.js"
 export * from "./cerebras.js"
 export * from "./chutes.js"
@@ -33,6 +34,7 @@ export * from "./deepinfra.js"
 export * from "./minimax.js"
 
 import { anthropicDefaultModelId } from "./anthropic.js"
+import { basetenDefaultModelId } from "./baseten.js"
 import { bedrockDefaultModelId } from "./bedrock.js"
 import { cerebrasDefaultModelId } from "./cerebras.js"
 import { chutesDefaultModelId } from "./chutes.js"
@@ -93,6 +95,8 @@ export function getProviderDefaultModelId(
 			return "meta-llama/Llama-3.3-70B-Instruct"
 		case "chutes":
 			return chutesDefaultModelId
+		case "baseten":
+			return basetenDefaultModelId
 		case "bedrock":
 			return bedrockDefaultModelId
 		case "vertex":

@@ -42,6 +42,7 @@ import {
 	VercelAiGatewayHandler,
 	DeepInfraHandler,
 	MiniMaxHandler,
+	BasetenHandler,
 } from "./providers"
 import { NativeOllamaHandler } from "./providers/native-ollama"
 
@@ -190,6 +191,8 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
 			return new VercelAiGatewayHandler(options)
 		case "minimax":
 			return new MiniMaxHandler(options)
+		case "baseten":
+			return new BasetenHandler(options)
 		default:
 			apiProvider satisfies "gemini-cli" | undefined
 			return new AnthropicHandler(options)