NVIDIA · HOYALIM · Jun 17, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/docs/inference/inference-options.mdx b/docs/inference/inference-options.mdx
@@ -74,6 +74,36 @@ The managed install/start vLLM entry appears by default on DGX Spark and DGX Sta
 | Local Ollama | Routes to a local Ollama instance on `localhost:11434`. NemoClaw detects installed models, offers starter models if none are present, pulls and warms the selected model, and validates it. | Selected during onboarding. For more information, refer to [Use a Local Inference Server](use-local-inference). |
 | Model Router | Starts a host-side router on port `4000`, registers it as an OpenAI-compatible provider, and keeps the sandbox pointed at `inference.local`. Set `NEMOCLAW_PROVIDER=routed` for non-interactive setup. | The router pool defines the model names. |
 
+## Model Task-Fit Guide
+
+Use this table as starter guidance when selecting a curated cloud model during onboarding.
+The provider catalog remains authoritative for exact context-window limits, availability, and current pricing.
+The relative labels below are qualitative and compare models within the curated onboarding choices, not across every model a provider offers.
+
+| Model | Best-for task type | Relative latency | Tool-use quality | Context-window fit | Relative cost |
+|---|---|---|---|---|---|
+| `nvidia/nemotron-3-super-120b-a12b` | Default hosted agent work, multi-step planning, and tool-heavy shell workflows | Medium | Strong default for OpenClaw tool loops | Large agent context | Medium |
+| `nvidia/nemotron-3-ultra-550b-a55b` | Quality-sensitive reasoning, careful synthesis, and complex reviews | Higher | Strong for complex tool plans | Large agent context | Higher |
+| `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning` | Reasoning-first and multimodal experiments where a compact hosted model is enough | Medium | Good after the smoke probe confirms final-answer content | Large agent context | Medium |
+| `z-ai/glm-5.1` | General chat, multilingual text work, and fast iteration | Low-to-medium | Good for straightforward tool loops | Large agent context | Low-to-medium |
+| `minimaxai/minimax-m2.7` | Long-form writing, multi-turn assistant work, and broad instruction following | Medium | Good for structured assistant turns | Large agent context | Medium |
+| `moonshotai/kimi-k2.6` | Coding tasks and shell-heavy agent trajectories | Medium | Strong with NemoClaw's Kimi tool-call compatibility path | Large-context friendly | Medium |
+| `openai/gpt-oss-120b` | Hosted open-weight style experimentation and cost-aware general agents | Medium | Good when provider-side tool calling is enabled | Large agent context | Medium |
+| `deepseek-ai/deepseek-v4-pro` | Code, math, and reasoning-heavy problem solving | Medium-to-high | Strong when the endpoint supports tool calls | Large agent context | Medium-to-high |
+| `gpt-5.4` | Default OpenAI-backed agent work and general high-quality reasoning | Medium | Strong | Large agent context | Medium-to-high |
+| `gpt-5.4-mini` | Latency-sensitive routine automation and repeated helper calls | Low | Good | Medium-to-large context | Low |
+| `gpt-5.4-nano` | Very low-latency classification, routing, extraction, and small helper tasks | Very low | Basic to good for simple tool loops | Medium context | Very low |
+| `gpt-5.4-pro-2026-03-05` | Quality-first complex reasoning where latency and cost are secondary | Highest | Validate Responses API support before relying on long tool loops | Large agent context | Highest |
+| `claude-sonnet-4-6` | Balanced coding, writing, analysis, and multi-step tool work | Medium | Strong | Large agent context | Medium-to-high |
+| `claude-haiku-4-5` | Fast summarization, routing, extraction, and lightweight assistant turns | Low | Good for simple tool loops | Medium-to-large context | Low |
+| `claude-opus-4-6` | Deep analysis, careful writing, and quality-first planning | Higher | Strong | Large agent context | Higher |
+| `gemini-3.1-pro-preview` | Large-context analysis, synthesis, and preview-feature evaluation | Medium-to-high | Good; validate tool continuation for the selected provider path | Very large context | Medium-to-high |
+| `gemini-3.1-flash-lite-preview` | Low-cost extraction, classification, and simple helper calls | Low | Basic to good for simple tool loops | Medium-to-large context | Low |
+| `gemini-3-flash-preview` | Fast general assistant tasks and preview-feature evaluation | Low | Good for simple tool loops | Large context | Low |
+| `gemini-2.5-pro` | Large-context analysis, long-document synthesis, and complex reasoning | Medium-to-high | Good | Very large context | Medium-to-high |
+| `gemini-2.5-flash` | Latency-sensitive general assistant and multimodal tasks | Low | Good for simple tool loops | Large context | Low |
+| `gemini-2.5-flash-lite` | Lowest-cost helper calls, extraction, and classification | Very low | Basic to good for simple tool loops | Medium-to-large context | Very low |
+
 ## Choosing the Right Option for Nemotron
 
 NVIDIA Nemotron models expose OpenAI-compatible APIs across every supported deployment surface, so two onboarding options can route to Nemotron.

diff --git a/test/inference-options-docs.test.ts b/test/inference-options-docs.test.ts
@@ -0,0 +1,106 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import fs from "node:fs";
+import { createRequire } from "node:module";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+import type * as TypeScript from "typescript";
+import { describe, expect, it } from "vitest";
+
+const require = createRequire(import.meta.url);
+const ts = require("typescript") as typeof TypeScript;
+const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
+const inferenceOptionsPath = path.join(repoRoot, "docs", "inference", "inference-options.mdx");
+const inferenceConfigPath = path.join(repoRoot, "src", "lib", "inference", "config.ts");
+
+function sectionBetween(markdown: string, heading: string, nextHeading: string): string {
+  const start = markdown.indexOf(heading);
+  const end = markdown.indexOf(nextHeading, start + heading.length);
+  expect(start).toBeGreaterThanOrEqual(0);
+  expect(end).toBeGreaterThan(start);
+  return markdown.slice(start, end);
+}
+
+function unwrapConstAssertion(expression: TypeScript.Expression): TypeScript.Expression {
+  return ts.isAsExpression(expression) ? unwrapConstAssertion(expression.expression) : expression;
+}
+
+function readCuratedCloudModelIds(): string[] {
+  const source = fs.readFileSync(inferenceConfigPath, "utf8");
+  const sourceFile = ts.createSourceFile(
+    inferenceConfigPath,
+    source,
+    ts.ScriptTarget.Latest,
+    true,
+    ts.ScriptKind.TS,
+  );
+
+  for (const statement of sourceFile.statements) {
+    const isExported = statement.modifiers?.some(
+      (modifier) => modifier.kind === ts.SyntaxKind.ExportKeyword,
+    );
+    if (!isExported || !ts.isVariableStatement(statement)) continue;
+
+    for (const declaration of statement.declarationList.declarations) {
+      if (declaration.name.getText(sourceFile) !== "CLOUD_MODEL_OPTIONS") continue;
+      const initializer = declaration.initializer && unwrapConstAssertion(declaration.initializer);
+      expect(initializer && ts.isArrayLiteralExpression(initializer)).toBe(true);
+
+      return (initializer as TypeScript.ArrayLiteralExpression).elements.map((element) => {
+        expect(ts.isObjectLiteralExpression(element)).toBe(true);
+        const idProperty = (element as TypeScript.ObjectLiteralExpression).properties.find(
+          (property) =>
+            ts.isPropertyAssignment(property) &&
+            property.name.getText(sourceFile) === "id" &&
+            ts.isStringLiteralLike(unwrapConstAssertion(property.initializer)),
+        );
+        expect(idProperty).toBeTruthy();
+        const idInitializer = unwrapConstAssertion(
+          (idProperty as TypeScript.PropertyAssignment).initializer,
+        );
+        return (idInitializer as TypeScript.StringLiteral).text;
+      });
+    }
+  }
+
+  throw new Error("CLOUD_MODEL_OPTIONS export was not found");
+}
+
+describe("inference options model task-fit docs (#4755)", () => {
+  it("keeps a per-model task-fit comparison table for curated onboarding models", () => {
+    const markdown = fs.readFileSync(inferenceOptionsPath, "utf8");
+    const section = sectionBetween(
+      markdown,
+      "## Model Task-Fit Guide",
+      "## Choosing the Right Option for Nemotron",
+    );
+
+    expect(section).toContain(
+      "| Model | Best-for task type | Relative latency | Tool-use quality | Context-window fit | Relative cost |",
+    );
+    expect(section).toContain("provider catalog remains authoritative");
+    expect(section).not.toMatch(/\bTBD\b|\bTODO\b/i);
+
+    const expectedModelIds = [
+      ...readCuratedCloudModelIds(),
+      "gpt-5.4",
+      "gpt-5.4-mini",
+      "gpt-5.4-nano",
+      "gpt-5.4-pro-2026-03-05",
+      "claude-sonnet-4-6",
+      "claude-haiku-4-5",
+      "claude-opus-4-6",
+      "gemini-3.1-pro-preview",
+      "gemini-3.1-flash-lite-preview",
+      "gemini-3-flash-preview",
+      "gemini-2.5-pro",
+      "gemini-2.5-flash",
+      "gemini-2.5-flash-lite",
+    ];
+
+    for (const modelId of expectedModelIds) {
+      expect(section).toContain(`| \`${modelId}\` |`);
+    }
+  });
+});