diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index 6c11c5f8a9..9522b7471d 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -742,6 +742,55 @@ jobs:
           if-no-files-found: ignore
           retention-days: 14
 
+  hermes-inference-switch-vitest:
+    needs: generate-matrix
+    if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',hermes-inference-switch-vitest,') || contains(format(',{0},', inputs.scenarios), ',hermes-inference-switch,') }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 55
+    env:
+      FREE_STANDING_VITEST_JOB: "1"
+      FREE_STANDING_SCENARIO_ID: "hermes-inference-switch"
+      E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/hermes-inference-switch
+      NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js
+      NEMOCLAW_RUN_E2E_SCENARIOS: "1"
+      NEMOCLAW_NON_INTERACTIVE: "1"
+      NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
+      NEMOCLAW_AGENT: "hermes"
+      NEMOCLAW_SANDBOX_NAME: "e2e-hermes-inference-switch"
+      OPENSHELL_GATEWAY: "nemoclaw"
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false
+      - name: Set up Node
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
+        with:
+          node-version: 22
+          cache: npm
+      - name: Install root dependencies
+        run: npm ci --ignore-scripts
+      - name: Build CLI
+        run: npm run build:cli
+      - name: Install OpenShell CLI
+        run: bash scripts/install-openshell.sh
+      - name: Run Hermes inference switch live Vitest test
+        env:
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+        run: |
+          set -euo pipefail
+          export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
+          export OPENSHELL_BIN="$(command -v openshell || true)"
+          npx vitest run --project e2e-scenarios-live             test/e2e-scenario/live/hermes-inference-switch.test.ts             --silent=false --reporter=default
+      - name: Upload Hermes inference switch artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: e2e-vitest-scenarios-hermes-inference-switch
+          path: e2e-artifacts/vitest/hermes-inference-switch/
+          include-hidden-files: false
+          if-no-files-found: ignore
+          retention-days: 14
+
   brave-search-vitest:
     needs: generate-matrix
     if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',brave-search-vitest,') || contains(format(',{0},', inputs.scenarios), ',brave-search,') }}
@@ -3966,7 +4015,9 @@ jobs:
         openclaw-skill-cli-vitest,
         inference-routing-vitest,
         cloud-inference-vitest,
+        hermes-inference-switch-vitest,
         brave-search-vitest,
+
         ollama-auth-proxy-vitest,
 
         cron-preflight-inference-local-vitest,
diff --git a/test/e2e-scenario/live/hermes-inference-switch-helpers.ts b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
new file mode 100644
index 0000000000..1c8e7272f7
--- /dev/null
+++ b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
@@ -0,0 +1,396 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import fs from "node:fs";
+import http, { type Server } from "node:http";
+import type { AddressInfo } from "node:net";
+import os from "node:os";
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import type { HostCliClient } from "../fixtures/clients/host.ts";
+import {
+  type SandboxClient,
+  trustedSandboxShellScript,
+  validateSandboxName,
+} from "../fixtures/clients/sandbox.ts";
+import { expect } from "../fixtures/e2e-test.ts";
+import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
+
+export const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+export const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+export const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch";
+validateSandboxName(SANDBOX_NAME);
+export const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
+export const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
+export const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
+const SWITCH_MOCK_ANTHROPIC = process.env.NEMOCLAW_SWITCH_MOCK_ANTHROPIC ?? "0";
+const SWITCH_MOCK_PORT = Number.parseInt(process.env.NEMOCLAW_SWITCH_MOCK_PORT ?? "0", 10);
+const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
+
+interface MockAnthropicProvider {
+  endpointUrl: string;
+  close(): Promise<void>;
+}
+
+export function env(apiKey?: string, extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
+  const out: NodeJS.ProcessEnv = {
+    ...buildAvailabilityProbeEnv(),
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    NEMOCLAW_AGENT: "hermes",
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+  };
+  apiKey && Object.assign(out, { NVIDIA_INFERENCE_API_KEY: apiKey, NVIDIA_API_KEY: apiKey });
+  return { ...out, ...extra };
+}
+
+export async function bestEffort(run: () => Promise<unknown>): Promise<void> {
+  try {
+    await run();
+  } catch {}
+}
+
+export function parseHermesModelBlock(text: string): Record<string, string> {
+  const model: Record<string, string> = {};
+  let inModel = false;
+  for (const line of text.split(/\r?\n/u)) {
+    const entersModel = /^model:\s*$/u.test(line);
+    entersModel && (inModel = true);
+    if (entersModel) continue;
+    if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break;
+    const match = inModel ? line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u) : null;
+    match && (model[match[1]] = match[2].replace(/^['"]|['"]$/gu, ""));
+  }
+  return model;
+}
+
+export function chatContent(raw: string): string {
+  const parsed = JSON.parse(raw) as {
+    choices?: Array<{ message?: Record<string, unknown> }>;
+    content?: Array<{ text?: unknown }>;
+  };
+  const anthropicText = parsed.content?.find((part) => typeof part.text === "string")?.text;
+  const message = parsed.choices?.[0]?.message ?? {};
+  const values = [anthropicText, message.content, message.reasoning_content, message.reasoning];
+  return (
+    values
+      .find((value): value is string => typeof value === "string" && value.trim().length > 0)
+      ?.trim() ?? ""
+  );
+}
+
+export async function cleanupHermesSwitch(
+  host: HostCliClient,
+  sandbox: SandboxClient,
+): Promise<void> {
+  await bestEffort(() =>
+    host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
+      artifactName: "cleanup-nemoclaw-destroy",
+      env: env(),
+      timeoutMs: 120_000,
+    }),
+  );
+  await bestEffort(() =>
+    sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
+      artifactName: "cleanup-openshell-delete",
+      env: env(),
+      timeoutMs: 60_000,
+    }),
+  );
+}
+
+function jsonResponse(res: http.ServerResponse, status: number, payload: unknown): void {
+  const body = JSON.stringify(payload);
+  res.writeHead(status, {
+    "content-type": "application/json",
+    "content-length": Buffer.byteLength(body),
+  });
+  res.end(body);
+}
+
+function sseResponse(res: http.ServerResponse, events: Array<[string, unknown]>): void {
+  res.writeHead(200, { "content-type": "text/event-stream", "cache-control": "no-cache" });
+  for (const [name, payload] of events) {
+    res.write(`event: ${name}\n`);
+    res.write(`data: ${JSON.stringify(payload)}\n\n`);
+  }
+  res.end();
+}
+
+function closeServer(server: Server): Promise<void> {
+  return new Promise((resolve, reject) => {
+    server.close((error) => (error ? reject(error) : resolve()));
+  });
+}
+
+async function startMockAnthropicProvider(): Promise<MockAnthropicProvider> {
+  const server = http.createServer((req, res) => {
+    const url = new URL(req.url ?? "/", "http://mock.local");
+    if (req.method === "GET" && url.pathname === "/health")
+      return jsonResponse(res, 200, { ok: true });
+    if (
+      req.method === "GET" &&
+      ["/v1/models", "/v1/models/mock-anthropic-model"].includes(url.pathname)
+    ) {
+      return jsonResponse(res, 200, { data: [{ id: "mock-anthropic-model" }] });
+    }
+    if (req.method !== "POST" || url.pathname !== "/v1/messages") {
+      return jsonResponse(res, 404, { error: "not found", path: url.pathname });
+    }
+    let raw = "";
+    req.setEncoding("utf8");
+    req.on("data", (chunk) => {
+      raw += chunk;
+    });
+    req.on("end", () => {
+      const payload = JSON.parse(raw || "{}") as { model?: unknown; stream?: unknown };
+      const model = typeof payload.model === "string" ? payload.model : "mock-anthropic-model";
+      if (payload.stream === true) {
+        return sseResponse(res, [
+          [
+            "message_start",
+            {
+              type: "message_start",
+              message: {
+                id: "msg_mock",
+                type: "message",
+                role: "assistant",
+                model,
+                content: [],
+                stop_reason: null,
+                stop_sequence: null,
+                usage: { input_tokens: 1, output_tokens: 0 },
+              },
+            },
+          ],
+          [
+            "content_block_start",
+            { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
+          ],
+          [
+            "content_block_delta",
+            { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "PONG" } },
+          ],
+          ["content_block_stop", { type: "content_block_stop", index: 0 }],
+          [
+            "message_delta",
+            {
+              type: "message_delta",
+              delta: { stop_reason: "end_turn", stop_sequence: null },
+              usage: { output_tokens: 1 },
+            },
+          ],
+          ["message_stop", { type: "message_stop" }],
+        ]);
+      }
+      return jsonResponse(res, 200, {
+        id: "msg_mock",
+        type: "message",
+        role: "assistant",
+        model,
+        content: [{ type: "text", text: "PONG" }],
+        stop_reason: "end_turn",
+        usage: { input_tokens: 1, output_tokens: 1 },
+      });
+    });
+  });
+  await new Promise<void>((resolve, reject) => {
+    server.once("error", reject);
+    server.listen(SWITCH_MOCK_PORT, "0.0.0.0", () => {
+      server.off("error", reject);
+      resolve();
+    });
+  });
+  const address = server.address();
+  if (!address || typeof address === "string") {
+    await closeServer(server);
+    throw new Error("mock Anthropic provider did not expose a TCP port");
+  }
+  return {
+    endpointUrl: `http://host.openshell.internal:${(address as AddressInfo).port}`,
+    close: () => closeServer(server),
+  };
+}
+
+export async function ensureCompatibleAnthropicSwitchProvider(
+  host: HostCliClient,
+  cleanup: { add(name: string, run: () => Promise<void> | void): void },
+): Promise<void> {
+  if (SWITCH_PROVIDER !== "compatible-anthropic-endpoint" || SWITCH_API !== "anthropic-messages")
+    return;
+  const mock = SWITCH_MOCK_ANTHROPIC === "1" ? await startMockAnthropicProvider() : undefined;
+  mock && cleanup.add("close compatible Anthropic switch mock", () => mock.close());
+  const endpointUrl = process.env.NEMOCLAW_SWITCH_ENDPOINT_URL ?? mock?.endpointUrl ?? "";
+  const compatibleKey = process.env.COMPATIBLE_ANTHROPIC_API_KEY ?? "test-compatible-anthropic-key";
+  expect(
+    endpointUrl,
+    "NEMOCLAW_SWITCH_ENDPOINT_URL is required for compatible Anthropic inference switches",
+  ).not.toBe("");
+  expect(
+    compatibleKey,
+    "COMPATIBLE_ANTHROPIC_API_KEY is required for compatible Anthropic inference switches",
+  ).not.toBe("");
+  const providerScript = [
+    "set -euo pipefail",
+    "if openshell provider get -g nemoclaw compatible-anthropic-endpoint >/dev/null 2>&1; then",
+    '  openshell provider update -g nemoclaw compatible-anthropic-endpoint --credential COMPATIBLE_ANTHROPIC_API_KEY --config "ANTHROPIC_BASE_URL=${SWITCH_ENDPOINT_URL}"',
+    "else",
+    '  openshell provider create -g nemoclaw --name compatible-anthropic-endpoint --type anthropic --credential COMPATIBLE_ANTHROPIC_API_KEY --config "ANTHROPIC_BASE_URL=${SWITCH_ENDPOINT_URL}"',
+    "fi",
+  ].join("\n");
+  const result = await host.command("bash", ["-lc", providerScript], {
+    artifactName: "register-compatible-anthropic-switch-provider",
+    env: env(undefined, {
+      COMPATIBLE_ANTHROPIC_API_KEY: compatibleKey,
+      SWITCH_ENDPOINT_URL: endpointUrl,
+    }),
+    redactionValues: [compatibleKey],
+    timeoutMs: 120_000,
+  });
+  expect(result.exitCode).toBe(0);
+}
+
+export async function installHermes(
+  host: HostCliClient,
+  apiKey: string,
+): Promise<ShellProbeResult> {
+  let install: ShellProbeResult | undefined;
+  for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) {
+    install = await host.command(
+      "bash",
+      ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"],
+      {
+        artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`,
+        cwd: REPO_ROOT,
+        env: env(apiKey),
+        redactionValues: [apiKey],
+        timeoutMs: 25 * 60_000,
+      },
+    );
+    const retry =
+      install.exitCode !== 0 &&
+      isTransientProviderValidationFailure(install) &&
+      attempt < INSTALL_ATTEMPTS;
+    install.exitCode === 0 && (attempt = INSTALL_ATTEMPTS + 1);
+    retry && (await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt)));
+    !retry && install.exitCode !== 0 && (attempt = INSTALL_ATTEMPTS + 1);
+  }
+  if (!install) throw new Error("install command did not run");
+  return install;
+}
+
+export async function hermesGatewayPid(
+  sandbox: SandboxClient,
+  artifactName: string,
+): Promise<ShellProbeResult> {
+  return await sandbox.execShell(
+    SANDBOX_NAME,
+    trustedSandboxShellScript(
+      "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
+    ),
+    { artifactName, env: env(), timeoutMs: 30_000 },
+  );
+}
+
+export async function envHash(
+  sandbox: SandboxClient,
+  artifactName: string,
+): Promise<ShellProbeResult> {
+  return await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
+    artifactName,
+    env: env(),
+    timeoutMs: 30_000,
+  });
+}
+
+export function maybeAssertPidStable(
+  before: ShellProbeResult,
+  after: ShellProbeResult,
+  assertStable: (a: string, b: string) => void,
+): void {
+  const beforePid = before.stdout.trim();
+  const afterPid = after.stdout.trim();
+  beforePid && afterPid && assertStable(afterPid, beforePid);
+}
+
+export function expectedBaseUrl(): string {
+  return SWITCH_API === "anthropic-messages"
+    ? "https://inference.local"
+    : "https://inference.local/v1";
+}
+
+export function expectedApiMode(): string | undefined {
+  return new Map<string, string>([
+    ["anthropic-messages", "anthropic_messages"],
+    ["openai-responses", "codex_responses"],
+  ]).get(SWITCH_API);
+}
+
+export async function apiKeyShape(sandbox: SandboxClient): Promise<ShellProbeResult> {
+  return await sandbox.execShell(
+    SANDBOX_NAME,
+    trustedSandboxShellScript(
+      "python3 - <<'PY'\nimport re\ntext=open('/sandbox/.hermes/config.yaml', encoding='utf-8').read()\nmatch=re.search(r'^\\s+api_key:\\s*[\\\"\\']?(sk-[^\\\"\\'\\s]+)', text, re.M)\nraise SystemExit(0 if match else 1)\nPY",
+    ),
+    { artifactName: "hermes-config-api-key-shape", env: env(), timeoutMs: 30_000 },
+  );
+}
+
+export async function hashCheck(
+  sandbox: SandboxClient,
+  file: string,
+  artifact: string,
+): Promise<ShellProbeResult> {
+  return await sandbox.execShell(
+    SANDBOX_NAME,
+    trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`),
+    { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 },
+  );
+}
+
+export async function strictHashPerms(sandbox: SandboxClient): Promise<ShellProbeResult> {
+  return await sandbox.execShell(
+    SANDBOX_NAME,
+    trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"),
+    { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 },
+  );
+}
+
+export function maybeAssertEnvHashStable(
+  before: ShellProbeResult,
+  after: ShellProbeResult,
+  assertStable: (a: string, b: string) => void,
+): void {
+  const beforeHash = before.stdout.split(/\s+/u)[0] ?? "";
+  const afterHash = after.stdout.split(/\s+/u)[0] ?? "";
+  beforeHash && assertStable(afterHash, beforeHash);
+}
+
+export function registryState(): { registry: Record<string, any>; session: Record<string, any> } {
+  return {
+    registry: JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"),
+    ),
+    session: JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"),
+    ),
+  };
+}
+
+function quotePayload(payload: string): string {
+  return payload.replace(/'/gu, `'\\''`);
+}
+
+export function inferenceLocalCommand(payload: string): string {
+  return SWITCH_API === "anthropic-messages"
+    ? `curl -sS --max-time 90 https://inference.local/v1/messages -H 'Content-Type: application/json' -H 'anthropic-version: 2023-06-01' -d '${quotePayload(payload)}'`
+    : `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${quotePayload(payload)}'`;
+}
+
+export function hermesApiCommand(payload: string): string {
+  return `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${quotePayload(payload)}'`;
+}
diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts
new file mode 100644
index 0000000000..b57ca4652d
--- /dev/null
+++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts
@@ -0,0 +1,176 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import { resultText } from "../fixtures/clients/index.ts";
+import { trustedSandboxShellScript } from "../fixtures/clients/sandbox.ts";
+import { expect, test } from "../fixtures/e2e-test.ts";
+import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
+import {
+  apiKeyShape,
+  CLI,
+  chatContent,
+  cleanupHermesSwitch,
+  ensureCompatibleAnthropicSwitchProvider,
+  env,
+  envHash,
+  expectedApiMode,
+  expectedBaseUrl,
+  hashCheck,
+  hermesApiCommand,
+  hermesGatewayPid,
+  inferenceLocalCommand,
+  installHermes,
+  maybeAssertEnvHashStable,
+  maybeAssertPidStable,
+  parseHermesModelBlock,
+  registryState,
+  SANDBOX_NAME,
+  SWITCH_API,
+  SWITCH_MODEL,
+  SWITCH_PROVIDER,
+  strictHashPerms,
+} from "./hermes-inference-switch-helpers.ts";
+
+const TIMEOUT_MS = 45 * 60_000;
+
+test.skipIf(!shouldRunLiveE2EScenarios())(
+  "Hermes inference set updates route/config and preserves live runtime",
+  { timeout: TIMEOUT_MS },
+  async ({ artifacts, cleanup, host, sandbox, secrets }) => {
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    await artifacts.writeJson("scenario.json", {
+      id: "hermes-inference-switch",
+      legacySource: "test/e2e/test-hermes-inference-switch.sh",
+      boundary: "install.sh + Hermes sandbox + inference set + in-sandbox health/chat probes",
+      sandboxName: SANDBOX_NAME,
+      switchProvider: SWITCH_PROVIDER,
+      switchModel: SWITCH_MODEL,
+      switchApi: SWITCH_API,
+    });
+
+    cleanup.add("destroy Hermes inference switch sandbox", () =>
+      cleanupHermesSwitch(host, sandbox),
+    );
+    await cleanupHermesSwitch(host, sandbox);
+
+    const docker = await host.command("docker", ["info"], {
+      artifactName: "docker-info",
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 30_000,
+    });
+    expect(docker.exitCode, resultText(docker)).toBe(0);
+
+    const install = await installHermes(host, apiKey);
+    expect(install.exitCode, resultText(install)).toBe(0);
+    await ensureCompatibleAnthropicSwitchProvider(host, cleanup);
+
+    const pidBefore = await hermesGatewayPid(sandbox, "pid-before");
+    const envHashBefore = await envHash(sandbox, "env-hash-before");
+
+    const switched = await host.command(
+      "node",
+      [CLI, "inference", "set", "--provider", SWITCH_PROVIDER, "--model", SWITCH_MODEL],
+      {
+        artifactName: "hermes-inference-set",
+        env: env(apiKey),
+        redactionValues: [apiKey],
+        timeoutMs: 180_000,
+      },
+    );
+    expect(switched.exitCode, resultText(switched)).toBe(0);
+
+    const pidAfter = await hermesGatewayPid(sandbox, "pid-after");
+    maybeAssertPidStable(pidBefore, pidAfter, (actual, expected) => expect(actual).toBe(expected));
+
+    const health = await sandbox.exec(
+      SANDBOX_NAME,
+      ["curl", "-sf", "--max-time", "10", "http://localhost:8642/health"],
+      { artifactName: "hermes-health-after-switch", env: env(), timeoutMs: 30_000 },
+    );
+    expect(health.exitCode, resultText(health)).toBe(0);
+    expect(resultText(health)).toMatch(/ok/i);
+
+    const route = await sandbox.openshell(["inference", "get", "-g", "nemoclaw"], {
+      artifactName: "openshell-inference-route",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    expect(route.exitCode, resultText(route)).toBe(0);
+    expect(resultText(route)).toContain(SWITCH_PROVIDER);
+    expect(resultText(route)).toContain(SWITCH_MODEL);
+
+    const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], {
+      artifactName: "hermes-config-yaml",
+      env: env(),
+      redactionValues: [apiKey],
+      timeoutMs: 30_000,
+    });
+    expect(config.exitCode, resultText(config)).toBe(0);
+    const model = parseHermesModelBlock(config.stdout);
+    expect(model.default).toBe(SWITCH_MODEL);
+    expect(model.provider).toBe("custom");
+    expect(model.base_url).toBe(expectedBaseUrl());
+    expect(model.api_mode).toBe(expectedApiMode());
+    expect((await apiKeyShape(sandbox)).exitCode).toBe(0);
+    expect(config.stdout).not.toMatch(/^models:\s*$/mu);
+
+    const strictHash = await hashCheck(sandbox, "/etc/nemoclaw/hermes.config-hash", "strict");
+    expect(strictHash.exitCode, resultText(strictHash)).toBe(0);
+    expect(strictHash.stdout).toContain("OK");
+    const compatHash = await hashCheck(sandbox, "/sandbox/.hermes/.config-hash", "compat");
+    expect(compatHash.exitCode, resultText(compatHash)).toBe(0);
+    expect(compatHash.stdout).toContain("OK");
+    const strictPerms = await strictHashPerms(sandbox);
+    expect(strictPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u);
+    expect(Number.parseInt(strictPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0);
+
+    maybeAssertEnvHashStable(
+      envHashBefore,
+      await envHash(sandbox, "env-hash-after"),
+      (actual, expected) => expect(actual).toBe(expected),
+    );
+
+    const state = registryState();
+    expect(state.registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
+    expect(state.registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
+    expect(state.registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);
+    expect(state.session.sandboxName).toBe(SANDBOX_NAME);
+    expect(state.session.agent).toBe("hermes");
+    expect(state.session.provider).toBe(SWITCH_PROVIDER);
+    expect(state.session.model).toBe(SWITCH_MODEL);
+
+    const inferenceLocalPayload = JSON.stringify({
+      model: SWITCH_MODEL,
+      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+      max_tokens: 100,
+    });
+    const inferenceLocal = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(inferenceLocalCommand(inferenceLocalPayload)),
+      {
+        artifactName: "hermes-inference-local-chat-after-switch",
+        env: env(),
+        redactionValues: [apiKey],
+        timeoutMs: 120_000,
+      },
+    );
+    expect(inferenceLocal.exitCode, resultText(inferenceLocal)).toBe(0);
+    expect(chatContent(inferenceLocal.stdout)).toMatch(/PONG/i);
+
+    const chat = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(hermesApiCommand(inferenceLocalPayload)),
+      {
+        artifactName: "hermes-api-chat-after-switch",
+        env: env(),
+        redactionValues: [apiKey],
+        timeoutMs: 150_000,
+      },
+    );
+    expect(chat.exitCode, resultText(chat)).toBe(0);
+    expect(chatContent(chat.stdout)).toMatch(/PONG/i);
+  },
+);