From bea95ca855affe7ba616e2a049e976500fddd533 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 19 Jun 2026 12:40:07 -0700
Subject: [PATCH 01/10] test(e2e): migrate Hermes inference switch to vitest

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .github/workflows/e2e-vitest-scenarios.yaml   |  50 ++++
 .../live/hermes-inference-switch.test.ts      | 260 ++++++++++++++++++
 2 files changed, 310 insertions(+)
 create mode 100644 test/e2e-scenario/live/hermes-inference-switch.test.ts

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index d270a3b670..7229eabc88 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -742,6 +742,55 @@ jobs:
           if-no-files-found: ignore
           retention-days: 14
 
+  hermes-inference-switch-vitest:
+    needs: generate-matrix
+    if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',hermes-inference-switch-vitest,') || contains(format(',{0},', inputs.scenarios), ',hermes-inference-switch,') }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 55
+    env:
+      FREE_STANDING_VITEST_JOB: "1"
+      FREE_STANDING_SCENARIO_ID: "hermes-inference-switch"
+      E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/hermes-inference-switch
+      NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js
+      NEMOCLAW_RUN_E2E_SCENARIOS: "1"
+      NEMOCLAW_NON_INTERACTIVE: "1"
+      NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
+      NEMOCLAW_AGENT: "hermes"
+      NEMOCLAW_SANDBOX_NAME: "e2e-hermes-inference-switch"
+      OPENSHELL_GATEWAY: "nemoclaw"
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false
+      - name: Set up Node
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
+        with:
+          node-version: 22
+          cache: npm
+      - name: Install root dependencies
+        run: npm ci --ignore-scripts
+      - name: Build CLI
+        run: npm run build:cli
+      - name: Install OpenShell CLI
+        run: bash scripts/install-openshell.sh
+      - name: Run Hermes inference switch live Vitest test
+        env:
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+        run: |
+          set -euo pipefail
+          export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
+          export OPENSHELL_BIN="$(command -v openshell || true)"
+          npx vitest run --project e2e-scenarios-live             test/e2e-scenario/live/hermes-inference-switch.test.ts             --silent=false --reporter=default
+      - name: Upload Hermes inference switch artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: e2e-vitest-scenarios-hermes-inference-switch
+          path: e2e-artifacts/vitest/hermes-inference-switch/
+          include-hidden-files: false
+          if-no-files-found: ignore
+          retention-days: 14
+
   issue-4434-tui-unreachable-inference-vitest:
     needs: generate-matrix
     if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',issue-4434-tui-unreachable-inference-vitest,') || contains(format(',{0},', inputs.scenarios), ',issue-4434-tui-unreachable-inference,') }}
@@ -3586,6 +3635,7 @@ jobs:
         openclaw-skill-cli-vitest,
         inference-routing-vitest,
         cloud-inference-vitest,
+        hermes-inference-switch-vitest,
         credential-sanitization-vitest,
         credential-migration-vitest,
         sessions-agents-cli-vitest,
diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts
new file mode 100644
index 0000000000..bac2c427bc
--- /dev/null
+++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts
@@ -0,0 +1,260 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */
+
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import { resultText } from "../fixtures/clients/index.ts";
+import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
+import { expect, test } from "../fixtures/e2e-test.ts";
+import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
+import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch";
+validateSandboxName(SANDBOX_NAME);
+const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
+const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
+const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
+const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
+const TIMEOUT_MS = 45 * 60_000;
+
+function env(apiKey?: string): NodeJS.ProcessEnv {
+  const out: NodeJS.ProcessEnv = {
+    ...buildAvailabilityProbeEnv(),
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    NEMOCLAW_AGENT: "hermes",
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+  };
+  if (apiKey) {
+    out.NVIDIA_INFERENCE_API_KEY = apiKey;
+    out.NVIDIA_API_KEY = apiKey;
+  }
+  return out;
+}
+
+async function bestEffort(run: () => Promise<unknown>): Promise<void> {
+  try {
+    await run();
+  } catch {}
+}
+
+function parseHermesModelBlock(text: string): Record<string, string> {
+  const model: Record<string, string> = {};
+  let inModel = false;
+  for (const line of text.split(/\r?\n/u)) {
+    if (/^model:\s*$/u.test(line)) {
+      inModel = true;
+      continue;
+    }
+    if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break;
+    if (!inModel) continue;
+    const match = line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u);
+    if (!match) continue;
+    const value = match[2].replace(/^['"]|['"]$/gu, "");
+    model[match[1]] = value;
+  }
+  return model;
+}
+
+function chatContent(raw: string): string {
+  const parsed = JSON.parse(raw) as { choices?: Array<{ message?: Record<string, unknown> }> };
+  const message = parsed.choices?.[0]?.message ?? {};
+  for (const key of ["content", "reasoning_content", "reasoning"]) {
+    const value = message[key];
+    if (typeof value === "string" && value.trim()) return value.trim();
+  }
+  return "";
+}
+
+test.skipIf(!shouldRunLiveE2EScenarios())(
+  "Hermes inference set updates route/config and preserves live runtime",
+  { timeout: TIMEOUT_MS },
+  async ({ artifacts, cleanup, host, sandbox, secrets }) => {
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    await artifacts.writeJson("scenario.json", {
+      id: "hermes-inference-switch",
+      legacySource: "test/e2e/test-hermes-inference-switch.sh",
+      boundary: "install.sh + Hermes sandbox + inference set + in-sandbox health/chat probes",
+      sandboxName: SANDBOX_NAME,
+      switchProvider: SWITCH_PROVIDER,
+      switchModel: SWITCH_MODEL,
+      switchApi: SWITCH_API,
+    });
+
+    cleanup.add("destroy Hermes inference switch sandbox", async () => {
+      await bestEffort(() =>
+        host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
+          artifactName: "cleanup-nemoclaw-destroy",
+          env: env(),
+          timeoutMs: 120_000,
+        }),
+      );
+      await bestEffort(() =>
+        sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
+          artifactName: "cleanup-openshell-delete",
+          env: env(),
+          timeoutMs: 60_000,
+        }),
+      );
+    });
+
+    await bestEffort(() =>
+      host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
+        artifactName: "pre-cleanup-destroy",
+        env: env(),
+        timeoutMs: 120_000,
+      }),
+    );
+    await bestEffort(() =>
+      sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
+        artifactName: "pre-cleanup-delete",
+        env: env(),
+        timeoutMs: 60_000,
+      }),
+    );
+
+    const docker = await host.command("docker", ["info"], {
+      artifactName: "docker-info",
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 30_000,
+    });
+    expect(docker.exitCode, resultText(docker)).toBe(0);
+
+    let install: ShellProbeResult | undefined;
+    for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) {
+      install = await host.command(
+        "bash",
+        ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"],
+        {
+          artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`,
+          cwd: REPO_ROOT,
+          env: env(apiKey),
+          redactionValues: [apiKey],
+          timeoutMs: 25 * 60_000,
+        },
+      );
+      if (install.exitCode === 0) break;
+      if (isTransientProviderValidationFailure(install) && attempt < INSTALL_ATTEMPTS) {
+        await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt));
+        continue;
+      }
+      break;
+    }
+    expect(install, "install command must run").toBeDefined();
+    expect(install?.exitCode, resultText(install as ShellProbeResult)).toBe(0);
+
+    const pidBefore = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
+      ),
+      { artifactName: "pid-before", env: env(), timeoutMs: 30_000 },
+    );
+    const envHashBefore = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
+      artifactName: "env-hash-before",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+
+    const switched = await host.command(
+      "node",
+      [CLI, "inference", "set", "--provider", SWITCH_PROVIDER, "--model", SWITCH_MODEL],
+      {
+        artifactName: "hermes-inference-set",
+        env: env(apiKey),
+        redactionValues: [apiKey],
+        timeoutMs: 180_000,
+      },
+    );
+    expect(switched.exitCode, resultText(switched)).toBe(0);
+
+    const pidAfter = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
+      ),
+      { artifactName: "pid-after", env: env(), timeoutMs: 30_000 },
+    );
+    if (pidBefore.stdout.trim() && pidAfter.stdout.trim())
+      expect(pidAfter.stdout.trim()).toBe(pidBefore.stdout.trim());
+
+    const health = await sandbox.exec(
+      SANDBOX_NAME,
+      ["curl", "-sf", "--max-time", "10", "http://localhost:8642/health"],
+      { artifactName: "hermes-health-after-switch", env: env(), timeoutMs: 30_000 },
+    );
+    expect(health.exitCode, resultText(health)).toBe(0);
+    expect(resultText(health)).toMatch(/ok/i);
+
+    const route = await sandbox.openshell(["inference", "get", "-g", "nemoclaw"], {
+      artifactName: "openshell-inference-route",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    expect(route.exitCode, resultText(route)).toBe(0);
+    expect(resultText(route)).toContain(SWITCH_PROVIDER);
+    expect(resultText(route)).toContain(SWITCH_MODEL);
+
+    const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], {
+      artifactName: "hermes-config-yaml",
+      env: env(),
+      redactionValues: [apiKey],
+      timeoutMs: 30_000,
+    });
+    expect(config.exitCode, resultText(config)).toBe(0);
+    const model = parseHermesModelBlock(config.stdout);
+    expect(model.default).toBe(SWITCH_MODEL);
+    expect(model.provider).toBe("custom");
+    expect(model.base_url).toBe(
+      SWITCH_API === "anthropic-messages"
+        ? "https://inference.local"
+        : "https://inference.local/v1",
+    );
+    expect(config.stdout).not.toMatch(/^models:\s*$/mu);
+
+    const envHashAfter = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
+      artifactName: "env-hash-after",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    if (envHashBefore.stdout.trim())
+      expect(envHashAfter.stdout.split(/\s+/u)[0]).toBe(envHashBefore.stdout.split(/\s+/u)[0]);
+
+    const registry = JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"),
+    );
+    expect(registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
+    expect(registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
+    expect(registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);
+
+    const payload = JSON.stringify({
+      model: SWITCH_MODEL,
+      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+      max_tokens: 100,
+    });
+    const chat = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${payload.replace(/'/gu, `'\\''`)}'`,
+      ),
+      {
+        artifactName: "hermes-api-chat-after-switch",
+        env: env(),
+        redactionValues: [apiKey],
+        timeoutMs: 150_000,
+      },
+    );
+    expect(chat.exitCode, resultText(chat)).toBe(0);
+    expect(chatContent(chat.stdout)).toMatch(/PONG/i);
+  },
+);

From d965382907fa18614ddc1e309ae8e2f7893928c4 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 19 Jun 2026 12:53:00 -0700
Subject: [PATCH 02/10] test(e2e): restore Hermes switch assertions

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../live/hermes-inference-switch.test.ts      | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts
index bac2c427bc..fb1f4af752 100644
--- a/test/e2e-scenario/live/hermes-inference-switch.test.ts
+++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts
@@ -220,8 +220,32 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
         ? "https://inference.local"
         : "https://inference.local/v1",
     );
+    if (SWITCH_API === "anthropic-messages") expect(model.api_mode).toBe("anthropic_messages");
+    else if (SWITCH_API === "openai-responses") expect(model.api_mode).toBe("codex_responses");
+    else expect(model.api_mode).toBeUndefined();
+    expect(model.api_key).toMatch(/^sk-/u);
     expect(config.stdout).not.toMatch(/^models:\s*$/mu);
 
+    for (const [file, artifact] of [
+      ["/etc/nemoclaw/hermes.config-hash", "strict"],
+      ["/sandbox/.hermes/.config-hash", "compat"],
+    ] as const) {
+      const hash = await sandbox.execShell(
+        SANDBOX_NAME,
+        trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`),
+        { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 },
+      );
+      expect(hash.exitCode, resultText(hash)).toBe(0);
+      expect(hash.stdout).toContain("OK");
+    }
+    const strictHashPerms = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"),
+      { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 },
+    );
+    expect(strictHashPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u);
+    expect(Number.parseInt(strictHashPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0);
+
     const envHashAfter = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
       artifactName: "env-hash-after",
       env: env(),
@@ -236,6 +260,33 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     expect(registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
     expect(registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
     expect(registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);
+    const session = JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"),
+    );
+    expect(session.sandboxName).toBe(SANDBOX_NAME);
+    expect(session.agent).toBe("hermes");
+    expect(session.provider).toBe(SWITCH_PROVIDER);
+    expect(session.model).toBe(SWITCH_MODEL);
+
+    const inferenceLocalPayload = JSON.stringify({
+      model: SWITCH_MODEL,
+      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+      max_tokens: 100,
+    });
+    const inferenceLocal = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`,
+      ),
+      {
+        artifactName: "hermes-inference-local-chat-after-switch",
+        env: env(),
+        redactionValues: [apiKey],
+        timeoutMs: 120_000,
+      },
+    );
+    expect(inferenceLocal.exitCode, resultText(inferenceLocal)).toBe(0);
+    expect(chatContent(inferenceLocal.stdout)).toMatch(/PONG/i);
 
     const payload = JSON.stringify({
       model: SWITCH_MODEL,

From ed011cdd0b7a993b789bd443e3149a54ed019609 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 19 Jun 2026 14:07:35 -0700
Subject: [PATCH 03/10] test(e2e): cover Hermes switch API modes

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../live/hermes-inference-switch.test.ts        | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts
index fb1f4af752..d7dcca4941 100644
--- a/test/e2e-scenario/live/hermes-inference-switch.test.ts
+++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts
@@ -67,7 +67,12 @@ function parseHermesModelBlock(text: string): Record<string, string> {
 }
 
 function chatContent(raw: string): string {
-  const parsed = JSON.parse(raw) as { choices?: Array<{ message?: Record<string, unknown> }> };
+  const parsed = JSON.parse(raw) as {
+    choices?: Array<{ message?: Record<string, unknown> }>;
+    content?: Array<{ text?: unknown }>;
+  };
+  const anthropicText = parsed.content?.find((part) => typeof part.text === "string")?.text;
+  if (typeof anthropicText === "string" && anthropicText.trim()) return anthropicText.trim();
   const message = parsed.choices?.[0]?.message ?? {};
   for (const key of ["content", "reasoning_content", "reasoning"]) {
     const value = message[key];
@@ -223,7 +228,7 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     if (SWITCH_API === "anthropic-messages") expect(model.api_mode).toBe("anthropic_messages");
     else if (SWITCH_API === "openai-responses") expect(model.api_mode).toBe("codex_responses");
     else expect(model.api_mode).toBeUndefined();
-    expect(model.api_key).toMatch(/^sk-/u);
+    expect(typeof model.api_key === "string" && /^sk-/u.test(model.api_key)).toBe(true);
     expect(config.stdout).not.toMatch(/^models:\s*$/mu);
 
     for (const [file, artifact] of [
@@ -273,11 +278,13 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
       messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
       max_tokens: 100,
     });
+    const inferenceLocalCommand =
+      SWITCH_API === "anthropic-messages"
+        ? `curl -sS --max-time 90 https://inference.local/v1/messages -H 'Content-Type: application/json' -H 'anthropic-version: 2023-06-01' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`
+        : `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`;
     const inferenceLocal = await sandbox.execShell(
       SANDBOX_NAME,
-      trustedSandboxShellScript(
-        `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`,
-      ),
+      trustedSandboxShellScript(inferenceLocalCommand),
       {
         artifactName: "hermes-inference-local-chat-after-switch",
         env: env(),

From 3f43d8d686a15875f548269867f5f21defd88947 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 19 Jun 2026 14:16:05 -0700
Subject: [PATCH 04/10] test(e2e): avoid Hermes api key redaction false
 negative

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 test/e2e-scenario/live/hermes-inference-switch.test.ts | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts
index d7dcca4941..ef16ab7a8b 100644
--- a/test/e2e-scenario/live/hermes-inference-switch.test.ts
+++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts
@@ -228,7 +228,14 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     if (SWITCH_API === "anthropic-messages") expect(model.api_mode).toBe("anthropic_messages");
     else if (SWITCH_API === "openai-responses") expect(model.api_mode).toBe("codex_responses");
     else expect(model.api_mode).toBeUndefined();
-    expect(typeof model.api_key === "string" && /^sk-/u.test(model.api_key)).toBe(true);
+    const apiKeyShape = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        "python3 - <<'PY'\nimport re\ntext=open('/sandbox/.hermes/config.yaml', encoding='utf-8').read()\nmatch=re.search(r'^\\s+api_key:\\s*[\\\"\\']?(sk-[^\\\"\\'\\s]+)', text, re.M)\nraise SystemExit(0 if match else 1)\nPY",
+      ),
+      { artifactName: "hermes-config-api-key-shape", env: env(), timeoutMs: 30_000 },
+    );
+    expect(apiKeyShape.exitCode, resultText(apiKeyShape)).toBe(0);
     expect(config.stdout).not.toMatch(/^models:\s*$/mu);
 
     for (const [file, artifact] of [

From fcedfdbc92f52221d2a487c839d27de41240b7bc Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 19 Jun 2026 14:49:50 -0700
Subject: [PATCH 05/10] test(e2e): relax workflow inventory timeout

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../support-tests/e2e-scenarios-workflow.test.ts             | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
index 2e3e22e0a7..c812fb20ca 100644
--- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
@@ -8,14 +8,13 @@ import path from "node:path";
 
 import { describe, expect, it } from "vitest";
 import YAML from "yaml";
-
-import { testTimeoutOptions } from "../../helpers/timeouts";
 import {
   evaluateE2eVitestWorkflowDispatchSelectors,
   readFreeStandingJobsInventory,
   validateE2eVitestScenariosWorkflowBoundary,
   validateFreeStandingWorkflowInventory,
 } from "../../../tools/e2e-scenarios/workflow-boundary.mts";
+import { testTimeoutOptions } from "../../helpers/timeouts";
 
 function readWorkflow(): Record<string, unknown> {
   return YAML.parse(
@@ -565,7 +564,7 @@ describe("e2e-vitest-scenarios workflow boundary", () => {
     },
   );
 
-  it("derives the free-standing inventory from workflow job metadata", () => {
+  it("derives the free-standing inventory from workflow job metadata", { timeout: 60_000 }, () => {
     const inventory = readFreeStandingJobsInventory();
     expect(validateFreeStandingWorkflowInventory()).toEqual([]);
     expect(inventory.allowedJobs).toContain("openshell-version-pin-vitest");

From 969a85319e77a15d6e7e60d6d484690557596f61 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Sat, 20 Jun 2026 08:53:40 -0700
Subject: [PATCH 06/10] test(e2e): move scenario logic out of test wrapper

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../live/hermes-inference-switch.scenario.ts  | 325 ++++++++++++++++++
 .../live/hermes-inference-switch.test.ts      | 323 +----------------
 2 files changed, 326 insertions(+), 322 deletions(-)
 create mode 100644 test/e2e-scenario/live/hermes-inference-switch.scenario.ts

diff --git a/test/e2e-scenario/live/hermes-inference-switch.scenario.ts b/test/e2e-scenario/live/hermes-inference-switch.scenario.ts
new file mode 100644
index 0000000000..ef16ab7a8b
--- /dev/null
+++ b/test/e2e-scenario/live/hermes-inference-switch.scenario.ts
@@ -0,0 +1,325 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */
+
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import { resultText } from "../fixtures/clients/index.ts";
+import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
+import { expect, test } from "../fixtures/e2e-test.ts";
+import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
+import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch";
+validateSandboxName(SANDBOX_NAME);
+const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
+const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
+const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
+const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
+const TIMEOUT_MS = 45 * 60_000;
+
+function env(apiKey?: string): NodeJS.ProcessEnv {
+  const out: NodeJS.ProcessEnv = {
+    ...buildAvailabilityProbeEnv(),
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    NEMOCLAW_AGENT: "hermes",
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+  };
+  if (apiKey) {
+    out.NVIDIA_INFERENCE_API_KEY = apiKey;
+    out.NVIDIA_API_KEY = apiKey;
+  }
+  return out;
+}
+
+async function bestEffort(run: () => Promise<unknown>): Promise<void> {
+  try {
+    await run();
+  } catch {}
+}
+
+function parseHermesModelBlock(text: string): Record<string, string> {
+  const model: Record<string, string> = {};
+  let inModel = false;
+  for (const line of text.split(/\r?\n/u)) {
+    if (/^model:\s*$/u.test(line)) {
+      inModel = true;
+      continue;
+    }
+    if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break;
+    if (!inModel) continue;
+    const match = line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u);
+    if (!match) continue;
+    const value = match[2].replace(/^['"]|['"]$/gu, "");
+    model[match[1]] = value;
+  }
+  return model;
+}
+
+function chatContent(raw: string): string {
+  const parsed = JSON.parse(raw) as {
+    choices?: Array<{ message?: Record<string, unknown> }>;
+    content?: Array<{ text?: unknown }>;
+  };
+  const anthropicText = parsed.content?.find((part) => typeof part.text === "string")?.text;
+  if (typeof anthropicText === "string" && anthropicText.trim()) return anthropicText.trim();
+  const message = parsed.choices?.[0]?.message ?? {};
+  for (const key of ["content", "reasoning_content", "reasoning"]) {
+    const value = message[key];
+    if (typeof value === "string" && value.trim()) return value.trim();
+  }
+  return "";
+}
+
+test.skipIf(!shouldRunLiveE2EScenarios())(
+  "Hermes inference set updates route/config and preserves live runtime",
+  { timeout: TIMEOUT_MS },
+  async ({ artifacts, cleanup, host, sandbox, secrets }) => {
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    await artifacts.writeJson("scenario.json", {
+      id: "hermes-inference-switch",
+      legacySource: "test/e2e/test-hermes-inference-switch.sh",
+      boundary: "install.sh + Hermes sandbox + inference set + in-sandbox health/chat probes",
+      sandboxName: SANDBOX_NAME,
+      switchProvider: SWITCH_PROVIDER,
+      switchModel: SWITCH_MODEL,
+      switchApi: SWITCH_API,
+    });
+
+    cleanup.add("destroy Hermes inference switch sandbox", async () => {
+      await bestEffort(() =>
+        host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
+          artifactName: "cleanup-nemoclaw-destroy",
+          env: env(),
+          timeoutMs: 120_000,
+        }),
+      );
+      await bestEffort(() =>
+        sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
+          artifactName: "cleanup-openshell-delete",
+          env: env(),
+          timeoutMs: 60_000,
+        }),
+      );
+    });
+
+    await bestEffort(() =>
+      host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
+        artifactName: "pre-cleanup-destroy",
+        env: env(),
+        timeoutMs: 120_000,
+      }),
+    );
+    await bestEffort(() =>
+      sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
+        artifactName: "pre-cleanup-delete",
+        env: env(),
+        timeoutMs: 60_000,
+      }),
+    );
+
+    const docker = await host.command("docker", ["info"], {
+      artifactName: "docker-info",
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 30_000,
+    });
+    expect(docker.exitCode, resultText(docker)).toBe(0);
+
+    let install: ShellProbeResult | undefined;
+    for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) {
+      install = await host.command(
+        "bash",
+        ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"],
+        {
+          artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`,
+          cwd: REPO_ROOT,
+          env: env(apiKey),
+          redactionValues: [apiKey],
+          timeoutMs: 25 * 60_000,
+        },
+      );
+      if (install.exitCode === 0) break;
+      if (isTransientProviderValidationFailure(install) && attempt < INSTALL_ATTEMPTS) {
+        await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt));
+        continue;
+      }
+      break;
+    }
+    expect(install, "install command must run").toBeDefined();
+    expect(install?.exitCode, resultText(install as ShellProbeResult)).toBe(0);
+
+    const pidBefore = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
+      ),
+      { artifactName: "pid-before", env: env(), timeoutMs: 30_000 },
+    );
+    const envHashBefore = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
+      artifactName: "env-hash-before",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+
+    const switched = await host.command(
+      "node",
+      [CLI, "inference", "set", "--provider", SWITCH_PROVIDER, "--model", SWITCH_MODEL],
+      {
+        artifactName: "hermes-inference-set",
+        env: env(apiKey),
+        redactionValues: [apiKey],
+        timeoutMs: 180_000,
+      },
+    );
+    expect(switched.exitCode, resultText(switched)).toBe(0);
+
+    const pidAfter = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
+      ),
+      { artifactName: "pid-after", env: env(), timeoutMs: 30_000 },
+    );
+    if (pidBefore.stdout.trim() && pidAfter.stdout.trim())
+      expect(pidAfter.stdout.trim()).toBe(pidBefore.stdout.trim());
+
+    const health = await sandbox.exec(
+      SANDBOX_NAME,
+      ["curl", "-sf", "--max-time", "10", "http://localhost:8642/health"],
+      { artifactName: "hermes-health-after-switch", env: env(), timeoutMs: 30_000 },
+    );
+    expect(health.exitCode, resultText(health)).toBe(0);
+    expect(resultText(health)).toMatch(/ok/i);
+
+    const route = await sandbox.openshell(["inference", "get", "-g", "nemoclaw"], {
+      artifactName: "openshell-inference-route",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    expect(route.exitCode, resultText(route)).toBe(0);
+    expect(resultText(route)).toContain(SWITCH_PROVIDER);
+    expect(resultText(route)).toContain(SWITCH_MODEL);
+
+    const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], {
+      artifactName: "hermes-config-yaml",
+      env: env(),
+      redactionValues: [apiKey],
+      timeoutMs: 30_000,
+    });
+    expect(config.exitCode, resultText(config)).toBe(0);
+    const model = parseHermesModelBlock(config.stdout);
+    expect(model.default).toBe(SWITCH_MODEL);
+    expect(model.provider).toBe("custom");
+    expect(model.base_url).toBe(
+      SWITCH_API === "anthropic-messages"
+        ? "https://inference.local"
+        : "https://inference.local/v1",
+    );
+    if (SWITCH_API === "anthropic-messages") expect(model.api_mode).toBe("anthropic_messages");
+    else if (SWITCH_API === "openai-responses") expect(model.api_mode).toBe("codex_responses");
+    else expect(model.api_mode).toBeUndefined();
+    const apiKeyShape = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        "python3 - <<'PY'\nimport re\ntext=open('/sandbox/.hermes/config.yaml', encoding='utf-8').read()\nmatch=re.search(r'^\\s+api_key:\\s*[\\\"\\']?(sk-[^\\\"\\'\\s]+)', text, re.M)\nraise SystemExit(0 if match else 1)\nPY",
+      ),
+      { artifactName: "hermes-config-api-key-shape", env: env(), timeoutMs: 30_000 },
+    );
+    expect(apiKeyShape.exitCode, resultText(apiKeyShape)).toBe(0);
+    expect(config.stdout).not.toMatch(/^models:\s*$/mu);
+
+    for (const [file, artifact] of [
+      ["/etc/nemoclaw/hermes.config-hash", "strict"],
+      ["/sandbox/.hermes/.config-hash", "compat"],
+    ] as const) {
+      const hash = await sandbox.execShell(
+        SANDBOX_NAME,
+        trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`),
+        { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 },
+      );
+      expect(hash.exitCode, resultText(hash)).toBe(0);
+      expect(hash.stdout).toContain("OK");
+    }
+    const strictHashPerms = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"),
+      { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 },
+    );
+    expect(strictHashPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u);
+    expect(Number.parseInt(strictHashPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0);
+
+    const envHashAfter = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
+      artifactName: "env-hash-after",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    if (envHashBefore.stdout.trim())
+      expect(envHashAfter.stdout.split(/\s+/u)[0]).toBe(envHashBefore.stdout.split(/\s+/u)[0]);
+
+    const registry = JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"),
+    );
+    expect(registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
+    expect(registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
+    expect(registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);
+    const session = JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"),
+    );
+    expect(session.sandboxName).toBe(SANDBOX_NAME);
+    expect(session.agent).toBe("hermes");
+    expect(session.provider).toBe(SWITCH_PROVIDER);
+    expect(session.model).toBe(SWITCH_MODEL);
+
+    const inferenceLocalPayload = JSON.stringify({
+      model: SWITCH_MODEL,
+      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+      max_tokens: 100,
+    });
+    const inferenceLocalCommand =
+      SWITCH_API === "anthropic-messages"
+        ? `curl -sS --max-time 90 https://inference.local/v1/messages -H 'Content-Type: application/json' -H 'anthropic-version: 2023-06-01' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`
+        : `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`;
+    const inferenceLocal = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(inferenceLocalCommand),
+      {
+        artifactName: "hermes-inference-local-chat-after-switch",
+        env: env(),
+        redactionValues: [apiKey],
+        timeoutMs: 120_000,
+      },
+    );
+    expect(inferenceLocal.exitCode, resultText(inferenceLocal)).toBe(0);
+    expect(chatContent(inferenceLocal.stdout)).toMatch(/PONG/i);
+
+    const payload = JSON.stringify({
+      model: SWITCH_MODEL,
+      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+      max_tokens: 100,
+    });
+    const chat = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${payload.replace(/'/gu, `'\\''`)}'`,
+      ),
+      {
+        artifactName: "hermes-api-chat-after-switch",
+        env: env(),
+        redactionValues: [apiKey],
+        timeoutMs: 150_000,
+      },
+    );
+    expect(chat.exitCode, resultText(chat)).toBe(0);
+    expect(chatContent(chat.stdout)).toMatch(/PONG/i);
+  },
+);
diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts
index ef16ab7a8b..6a0b726c04 100644
--- a/test/e2e-scenario/live/hermes-inference-switch.test.ts
+++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts
@@ -1,325 +1,4 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-/** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */
-
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-
-import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
-import { resultText } from "../fixtures/clients/index.ts";
-import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
-import { expect, test } from "../fixtures/e2e-test.ts";
-import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
-import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
-import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
-const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch";
-validateSandboxName(SANDBOX_NAME);
-const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
-const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
-const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
-const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
-const TIMEOUT_MS = 45 * 60_000;
-
-function env(apiKey?: string): NodeJS.ProcessEnv {
-  const out: NodeJS.ProcessEnv = {
-    ...buildAvailabilityProbeEnv(),
-    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
-    NEMOCLAW_AGENT: "hermes",
-    NEMOCLAW_NON_INTERACTIVE: "1",
-    NEMOCLAW_RECREATE_SANDBOX: "1",
-    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
-    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
-  };
-  if (apiKey) {
-    out.NVIDIA_INFERENCE_API_KEY = apiKey;
-    out.NVIDIA_API_KEY = apiKey;
-  }
-  return out;
-}
-
-async function bestEffort(run: () => Promise<unknown>): Promise<void> {
-  try {
-    await run();
-  } catch {}
-}
-
-function parseHermesModelBlock(text: string): Record<string, string> {
-  const model: Record<string, string> = {};
-  let inModel = false;
-  for (const line of text.split(/\r?\n/u)) {
-    if (/^model:\s*$/u.test(line)) {
-      inModel = true;
-      continue;
-    }
-    if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break;
-    if (!inModel) continue;
-    const match = line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u);
-    if (!match) continue;
-    const value = match[2].replace(/^['"]|['"]$/gu, "");
-    model[match[1]] = value;
-  }
-  return model;
-}
-
-function chatContent(raw: string): string {
-  const parsed = JSON.parse(raw) as {
-    choices?: Array<{ message?: Record<string, unknown> }>;
-    content?: Array<{ text?: unknown }>;
-  };
-  const anthropicText = parsed.content?.find((part) => typeof part.text === "string")?.text;
-  if (typeof anthropicText === "string" && anthropicText.trim()) return anthropicText.trim();
-  const message = parsed.choices?.[0]?.message ?? {};
-  for (const key of ["content", "reasoning_content", "reasoning"]) {
-    const value = message[key];
-    if (typeof value === "string" && value.trim()) return value.trim();
-  }
-  return "";
-}
-
-test.skipIf(!shouldRunLiveE2EScenarios())(
-  "Hermes inference set updates route/config and preserves live runtime",
-  { timeout: TIMEOUT_MS },
-  async ({ artifacts, cleanup, host, sandbox, secrets }) => {
-    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
-    await artifacts.writeJson("scenario.json", {
-      id: "hermes-inference-switch",
-      legacySource: "test/e2e/test-hermes-inference-switch.sh",
-      boundary: "install.sh + Hermes sandbox + inference set + in-sandbox health/chat probes",
-      sandboxName: SANDBOX_NAME,
-      switchProvider: SWITCH_PROVIDER,
-      switchModel: SWITCH_MODEL,
-      switchApi: SWITCH_API,
-    });
-
-    cleanup.add("destroy Hermes inference switch sandbox", async () => {
-      await bestEffort(() =>
-        host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
-          artifactName: "cleanup-nemoclaw-destroy",
-          env: env(),
-          timeoutMs: 120_000,
-        }),
-      );
-      await bestEffort(() =>
-        sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
-          artifactName: "cleanup-openshell-delete",
-          env: env(),
-          timeoutMs: 60_000,
-        }),
-      );
-    });
-
-    await bestEffort(() =>
-      host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
-        artifactName: "pre-cleanup-destroy",
-        env: env(),
-        timeoutMs: 120_000,
-      }),
-    );
-    await bestEffort(() =>
-      sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
-        artifactName: "pre-cleanup-delete",
-        env: env(),
-        timeoutMs: 60_000,
-      }),
-    );
-
-    const docker = await host.command("docker", ["info"], {
-      artifactName: "docker-info",
-      env: buildAvailabilityProbeEnv(),
-      timeoutMs: 30_000,
-    });
-    expect(docker.exitCode, resultText(docker)).toBe(0);
-
-    let install: ShellProbeResult | undefined;
-    for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) {
-      install = await host.command(
-        "bash",
-        ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"],
-        {
-          artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`,
-          cwd: REPO_ROOT,
-          env: env(apiKey),
-          redactionValues: [apiKey],
-          timeoutMs: 25 * 60_000,
-        },
-      );
-      if (install.exitCode === 0) break;
-      if (isTransientProviderValidationFailure(install) && attempt < INSTALL_ATTEMPTS) {
-        await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt));
-        continue;
-      }
-      break;
-    }
-    expect(install, "install command must run").toBeDefined();
-    expect(install?.exitCode, resultText(install as ShellProbeResult)).toBe(0);
-
-    const pidBefore = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
-      ),
-      { artifactName: "pid-before", env: env(), timeoutMs: 30_000 },
-    );
-    const envHashBefore = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
-      artifactName: "env-hash-before",
-      env: env(),
-      timeoutMs: 30_000,
-    });
-
-    const switched = await host.command(
-      "node",
-      [CLI, "inference", "set", "--provider", SWITCH_PROVIDER, "--model", SWITCH_MODEL],
-      {
-        artifactName: "hermes-inference-set",
-        env: env(apiKey),
-        redactionValues: [apiKey],
-        timeoutMs: 180_000,
-      },
-    );
-    expect(switched.exitCode, resultText(switched)).toBe(0);
-
-    const pidAfter = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
-      ),
-      { artifactName: "pid-after", env: env(), timeoutMs: 30_000 },
-    );
-    if (pidBefore.stdout.trim() && pidAfter.stdout.trim())
-      expect(pidAfter.stdout.trim()).toBe(pidBefore.stdout.trim());
-
-    const health = await sandbox.exec(
-      SANDBOX_NAME,
-      ["curl", "-sf", "--max-time", "10", "http://localhost:8642/health"],
-      { artifactName: "hermes-health-after-switch", env: env(), timeoutMs: 30_000 },
-    );
-    expect(health.exitCode, resultText(health)).toBe(0);
-    expect(resultText(health)).toMatch(/ok/i);
-
-    const route = await sandbox.openshell(["inference", "get", "-g", "nemoclaw"], {
-      artifactName: "openshell-inference-route",
-      env: env(),
-      timeoutMs: 30_000,
-    });
-    expect(route.exitCode, resultText(route)).toBe(0);
-    expect(resultText(route)).toContain(SWITCH_PROVIDER);
-    expect(resultText(route)).toContain(SWITCH_MODEL);
-
-    const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], {
-      artifactName: "hermes-config-yaml",
-      env: env(),
-      redactionValues: [apiKey],
-      timeoutMs: 30_000,
-    });
-    expect(config.exitCode, resultText(config)).toBe(0);
-    const model = parseHermesModelBlock(config.stdout);
-    expect(model.default).toBe(SWITCH_MODEL);
-    expect(model.provider).toBe("custom");
-    expect(model.base_url).toBe(
-      SWITCH_API === "anthropic-messages"
-        ? "https://inference.local"
-        : "https://inference.local/v1",
-    );
-    if (SWITCH_API === "anthropic-messages") expect(model.api_mode).toBe("anthropic_messages");
-    else if (SWITCH_API === "openai-responses") expect(model.api_mode).toBe("codex_responses");
-    else expect(model.api_mode).toBeUndefined();
-    const apiKeyShape = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        "python3 - <<'PY'\nimport re\ntext=open('/sandbox/.hermes/config.yaml', encoding='utf-8').read()\nmatch=re.search(r'^\\s+api_key:\\s*[\\\"\\']?(sk-[^\\\"\\'\\s]+)', text, re.M)\nraise SystemExit(0 if match else 1)\nPY",
-      ),
-      { artifactName: "hermes-config-api-key-shape", env: env(), timeoutMs: 30_000 },
-    );
-    expect(apiKeyShape.exitCode, resultText(apiKeyShape)).toBe(0);
-    expect(config.stdout).not.toMatch(/^models:\s*$/mu);
-
-    for (const [file, artifact] of [
-      ["/etc/nemoclaw/hermes.config-hash", "strict"],
-      ["/sandbox/.hermes/.config-hash", "compat"],
-    ] as const) {
-      const hash = await sandbox.execShell(
-        SANDBOX_NAME,
-        trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`),
-        { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 },
-      );
-      expect(hash.exitCode, resultText(hash)).toBe(0);
-      expect(hash.stdout).toContain("OK");
-    }
-    const strictHashPerms = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"),
-      { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 },
-    );
-    expect(strictHashPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u);
-    expect(Number.parseInt(strictHashPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0);
-
-    const envHashAfter = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
-      artifactName: "env-hash-after",
-      env: env(),
-      timeoutMs: 30_000,
-    });
-    if (envHashBefore.stdout.trim())
-      expect(envHashAfter.stdout.split(/\s+/u)[0]).toBe(envHashBefore.stdout.split(/\s+/u)[0]);
-
-    const registry = JSON.parse(
-      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"),
-    );
-    expect(registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
-    expect(registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
-    expect(registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);
-    const session = JSON.parse(
-      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"),
-    );
-    expect(session.sandboxName).toBe(SANDBOX_NAME);
-    expect(session.agent).toBe("hermes");
-    expect(session.provider).toBe(SWITCH_PROVIDER);
-    expect(session.model).toBe(SWITCH_MODEL);
-
-    const inferenceLocalPayload = JSON.stringify({
-      model: SWITCH_MODEL,
-      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
-      max_tokens: 100,
-    });
-    const inferenceLocalCommand =
-      SWITCH_API === "anthropic-messages"
-        ? `curl -sS --max-time 90 https://inference.local/v1/messages -H 'Content-Type: application/json' -H 'anthropic-version: 2023-06-01' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`
-        : `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`;
-    const inferenceLocal = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(inferenceLocalCommand),
-      {
-        artifactName: "hermes-inference-local-chat-after-switch",
-        env: env(),
-        redactionValues: [apiKey],
-        timeoutMs: 120_000,
-      },
-    );
-    expect(inferenceLocal.exitCode, resultText(inferenceLocal)).toBe(0);
-    expect(chatContent(inferenceLocal.stdout)).toMatch(/PONG/i);
-
-    const payload = JSON.stringify({
-      model: SWITCH_MODEL,
-      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
-      max_tokens: 100,
-    });
-    const chat = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${payload.replace(/'/gu, `'\\''`)}'`,
-      ),
-      {
-        artifactName: "hermes-api-chat-after-switch",
-        env: env(),
-        redactionValues: [apiKey],
-        timeoutMs: 150_000,
-      },
-    );
-    expect(chat.exitCode, resultText(chat)).toBe(0);
-    expect(chatContent(chat.stdout)).toMatch(/PONG/i);
-  },
-);
+import "./hermes-inference-switch.scenario.ts";

From c57bee353a5cb03fee8c340de15018d29c9d39aa Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Sat, 20 Jun 2026 09:28:05 -0700
Subject: [PATCH 07/10] test(e2e): restore scenario logic to test file

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../live/hermes-inference-switch.scenario.ts  | 325 ------------------
 .../live/hermes-inference-switch.test.ts      | 323 ++++++++++++++++-
 2 files changed, 322 insertions(+), 326 deletions(-)
 delete mode 100644 test/e2e-scenario/live/hermes-inference-switch.scenario.ts

diff --git a/test/e2e-scenario/live/hermes-inference-switch.scenario.ts b/test/e2e-scenario/live/hermes-inference-switch.scenario.ts
deleted file mode 100644
index ef16ab7a8b..0000000000
--- a/test/e2e-scenario/live/hermes-inference-switch.scenario.ts
+++ /dev/null
@@ -1,325 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */
-
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-
-import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
-import { resultText } from "../fixtures/clients/index.ts";
-import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
-import { expect, test } from "../fixtures/e2e-test.ts";
-import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
-import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
-import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
-const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch";
-validateSandboxName(SANDBOX_NAME);
-const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
-const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
-const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
-const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
-const TIMEOUT_MS = 45 * 60_000;
-
-function env(apiKey?: string): NodeJS.ProcessEnv {
-  const out: NodeJS.ProcessEnv = {
-    ...buildAvailabilityProbeEnv(),
-    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
-    NEMOCLAW_AGENT: "hermes",
-    NEMOCLAW_NON_INTERACTIVE: "1",
-    NEMOCLAW_RECREATE_SANDBOX: "1",
-    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
-    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
-  };
-  if (apiKey) {
-    out.NVIDIA_INFERENCE_API_KEY = apiKey;
-    out.NVIDIA_API_KEY = apiKey;
-  }
-  return out;
-}
-
-async function bestEffort(run: () => Promise<unknown>): Promise<void> {
-  try {
-    await run();
-  } catch {}
-}
-
-function parseHermesModelBlock(text: string): Record<string, string> {
-  const model: Record<string, string> = {};
-  let inModel = false;
-  for (const line of text.split(/\r?\n/u)) {
-    if (/^model:\s*$/u.test(line)) {
-      inModel = true;
-      continue;
-    }
-    if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break;
-    if (!inModel) continue;
-    const match = line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u);
-    if (!match) continue;
-    const value = match[2].replace(/^['"]|['"]$/gu, "");
-    model[match[1]] = value;
-  }
-  return model;
-}
-
-function chatContent(raw: string): string {
-  const parsed = JSON.parse(raw) as {
-    choices?: Array<{ message?: Record<string, unknown> }>;
-    content?: Array<{ text?: unknown }>;
-  };
-  const anthropicText = parsed.content?.find((part) => typeof part.text === "string")?.text;
-  if (typeof anthropicText === "string" && anthropicText.trim()) return anthropicText.trim();
-  const message = parsed.choices?.[0]?.message ?? {};
-  for (const key of ["content", "reasoning_content", "reasoning"]) {
-    const value = message[key];
-    if (typeof value === "string" && value.trim()) return value.trim();
-  }
-  return "";
-}
-
-test.skipIf(!shouldRunLiveE2EScenarios())(
-  "Hermes inference set updates route/config and preserves live runtime",
-  { timeout: TIMEOUT_MS },
-  async ({ artifacts, cleanup, host, sandbox, secrets }) => {
-    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
-    await artifacts.writeJson("scenario.json", {
-      id: "hermes-inference-switch",
-      legacySource: "test/e2e/test-hermes-inference-switch.sh",
-      boundary: "install.sh + Hermes sandbox + inference set + in-sandbox health/chat probes",
-      sandboxName: SANDBOX_NAME,
-      switchProvider: SWITCH_PROVIDER,
-      switchModel: SWITCH_MODEL,
-      switchApi: SWITCH_API,
-    });
-
-    cleanup.add("destroy Hermes inference switch sandbox", async () => {
-      await bestEffort(() =>
-        host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
-          artifactName: "cleanup-nemoclaw-destroy",
-          env: env(),
-          timeoutMs: 120_000,
-        }),
-      );
-      await bestEffort(() =>
-        sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
-          artifactName: "cleanup-openshell-delete",
-          env: env(),
-          timeoutMs: 60_000,
-        }),
-      );
-    });
-
-    await bestEffort(() =>
-      host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
-        artifactName: "pre-cleanup-destroy",
-        env: env(),
-        timeoutMs: 120_000,
-      }),
-    );
-    await bestEffort(() =>
-      sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
-        artifactName: "pre-cleanup-delete",
-        env: env(),
-        timeoutMs: 60_000,
-      }),
-    );
-
-    const docker = await host.command("docker", ["info"], {
-      artifactName: "docker-info",
-      env: buildAvailabilityProbeEnv(),
-      timeoutMs: 30_000,
-    });
-    expect(docker.exitCode, resultText(docker)).toBe(0);
-
-    let install: ShellProbeResult | undefined;
-    for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) {
-      install = await host.command(
-        "bash",
-        ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"],
-        {
-          artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`,
-          cwd: REPO_ROOT,
-          env: env(apiKey),
-          redactionValues: [apiKey],
-          timeoutMs: 25 * 60_000,
-        },
-      );
-      if (install.exitCode === 0) break;
-      if (isTransientProviderValidationFailure(install) && attempt < INSTALL_ATTEMPTS) {
-        await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt));
-        continue;
-      }
-      break;
-    }
-    expect(install, "install command must run").toBeDefined();
-    expect(install?.exitCode, resultText(install as ShellProbeResult)).toBe(0);
-
-    const pidBefore = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
-      ),
-      { artifactName: "pid-before", env: env(), timeoutMs: 30_000 },
-    );
-    const envHashBefore = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
-      artifactName: "env-hash-before",
-      env: env(),
-      timeoutMs: 30_000,
-    });
-
-    const switched = await host.command(
-      "node",
-      [CLI, "inference", "set", "--provider", SWITCH_PROVIDER, "--model", SWITCH_MODEL],
-      {
-        artifactName: "hermes-inference-set",
-        env: env(apiKey),
-        redactionValues: [apiKey],
-        timeoutMs: 180_000,
-      },
-    );
-    expect(switched.exitCode, resultText(switched)).toBe(0);
-
-    const pidAfter = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
-      ),
-      { artifactName: "pid-after", env: env(), timeoutMs: 30_000 },
-    );
-    if (pidBefore.stdout.trim() && pidAfter.stdout.trim())
-      expect(pidAfter.stdout.trim()).toBe(pidBefore.stdout.trim());
-
-    const health = await sandbox.exec(
-      SANDBOX_NAME,
-      ["curl", "-sf", "--max-time", "10", "http://localhost:8642/health"],
-      { artifactName: "hermes-health-after-switch", env: env(), timeoutMs: 30_000 },
-    );
-    expect(health.exitCode, resultText(health)).toBe(0);
-    expect(resultText(health)).toMatch(/ok/i);
-
-    const route = await sandbox.openshell(["inference", "get", "-g", "nemoclaw"], {
-      artifactName: "openshell-inference-route",
-      env: env(),
-      timeoutMs: 30_000,
-    });
-    expect(route.exitCode, resultText(route)).toBe(0);
-    expect(resultText(route)).toContain(SWITCH_PROVIDER);
-    expect(resultText(route)).toContain(SWITCH_MODEL);
-
-    const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], {
-      artifactName: "hermes-config-yaml",
-      env: env(),
-      redactionValues: [apiKey],
-      timeoutMs: 30_000,
-    });
-    expect(config.exitCode, resultText(config)).toBe(0);
-    const model = parseHermesModelBlock(config.stdout);
-    expect(model.default).toBe(SWITCH_MODEL);
-    expect(model.provider).toBe("custom");
-    expect(model.base_url).toBe(
-      SWITCH_API === "anthropic-messages"
-        ? "https://inference.local"
-        : "https://inference.local/v1",
-    );
-    if (SWITCH_API === "anthropic-messages") expect(model.api_mode).toBe("anthropic_messages");
-    else if (SWITCH_API === "openai-responses") expect(model.api_mode).toBe("codex_responses");
-    else expect(model.api_mode).toBeUndefined();
-    const apiKeyShape = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        "python3 - <<'PY'\nimport re\ntext=open('/sandbox/.hermes/config.yaml', encoding='utf-8').read()\nmatch=re.search(r'^\\s+api_key:\\s*[\\\"\\']?(sk-[^\\\"\\'\\s]+)', text, re.M)\nraise SystemExit(0 if match else 1)\nPY",
-      ),
-      { artifactName: "hermes-config-api-key-shape", env: env(), timeoutMs: 30_000 },
-    );
-    expect(apiKeyShape.exitCode, resultText(apiKeyShape)).toBe(0);
-    expect(config.stdout).not.toMatch(/^models:\s*$/mu);
-
-    for (const [file, artifact] of [
-      ["/etc/nemoclaw/hermes.config-hash", "strict"],
-      ["/sandbox/.hermes/.config-hash", "compat"],
-    ] as const) {
-      const hash = await sandbox.execShell(
-        SANDBOX_NAME,
-        trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`),
-        { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 },
-      );
-      expect(hash.exitCode, resultText(hash)).toBe(0);
-      expect(hash.stdout).toContain("OK");
-    }
-    const strictHashPerms = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"),
-      { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 },
-    );
-    expect(strictHashPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u);
-    expect(Number.parseInt(strictHashPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0);
-
-    const envHashAfter = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
-      artifactName: "env-hash-after",
-      env: env(),
-      timeoutMs: 30_000,
-    });
-    if (envHashBefore.stdout.trim())
-      expect(envHashAfter.stdout.split(/\s+/u)[0]).toBe(envHashBefore.stdout.split(/\s+/u)[0]);
-
-    const registry = JSON.parse(
-      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"),
-    );
-    expect(registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
-    expect(registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
-    expect(registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);
-    const session = JSON.parse(
-      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"),
-    );
-    expect(session.sandboxName).toBe(SANDBOX_NAME);
-    expect(session.agent).toBe("hermes");
-    expect(session.provider).toBe(SWITCH_PROVIDER);
-    expect(session.model).toBe(SWITCH_MODEL);
-
-    const inferenceLocalPayload = JSON.stringify({
-      model: SWITCH_MODEL,
-      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
-      max_tokens: 100,
-    });
-    const inferenceLocalCommand =
-      SWITCH_API === "anthropic-messages"
-        ? `curl -sS --max-time 90 https://inference.local/v1/messages -H 'Content-Type: application/json' -H 'anthropic-version: 2023-06-01' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`
-        : `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`;
-    const inferenceLocal = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(inferenceLocalCommand),
-      {
-        artifactName: "hermes-inference-local-chat-after-switch",
-        env: env(),
-        redactionValues: [apiKey],
-        timeoutMs: 120_000,
-      },
-    );
-    expect(inferenceLocal.exitCode, resultText(inferenceLocal)).toBe(0);
-    expect(chatContent(inferenceLocal.stdout)).toMatch(/PONG/i);
-
-    const payload = JSON.stringify({
-      model: SWITCH_MODEL,
-      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
-      max_tokens: 100,
-    });
-    const chat = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${payload.replace(/'/gu, `'\\''`)}'`,
-      ),
-      {
-        artifactName: "hermes-api-chat-after-switch",
-        env: env(),
-        redactionValues: [apiKey],
-        timeoutMs: 150_000,
-      },
-    );
-    expect(chat.exitCode, resultText(chat)).toBe(0);
-    expect(chatContent(chat.stdout)).toMatch(/PONG/i);
-  },
-);
diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts
index 6a0b726c04..ef16ab7a8b 100644
--- a/test/e2e-scenario/live/hermes-inference-switch.test.ts
+++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts
@@ -1,4 +1,325 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-import "./hermes-inference-switch.scenario.ts";
+/** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */
+
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import { resultText } from "../fixtures/clients/index.ts";
+import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
+import { expect, test } from "../fixtures/e2e-test.ts";
+import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
+import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch";
+validateSandboxName(SANDBOX_NAME);
+const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
+const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
+const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
+const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
+const TIMEOUT_MS = 45 * 60_000;
+
+function env(apiKey?: string): NodeJS.ProcessEnv {
+  const out: NodeJS.ProcessEnv = {
+    ...buildAvailabilityProbeEnv(),
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    NEMOCLAW_AGENT: "hermes",
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+  };
+  if (apiKey) {
+    out.NVIDIA_INFERENCE_API_KEY = apiKey;
+    out.NVIDIA_API_KEY = apiKey;
+  }
+  return out;
+}
+
+async function bestEffort(run: () => Promise<unknown>): Promise<void> {
+  try {
+    await run();
+  } catch {}
+}
+
+function parseHermesModelBlock(text: string): Record<string, string> {
+  const model: Record<string, string> = {};
+  let inModel = false;
+  for (const line of text.split(/\r?\n/u)) {
+    if (/^model:\s*$/u.test(line)) {
+      inModel = true;
+      continue;
+    }
+    if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break;
+    if (!inModel) continue;
+    const match = line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u);
+    if (!match) continue;
+    const value = match[2].replace(/^['"]|['"]$/gu, "");
+    model[match[1]] = value;
+  }
+  return model;
+}
+
+function chatContent(raw: string): string {
+  const parsed = JSON.parse(raw) as {
+    choices?: Array<{ message?: Record<string, unknown> }>;
+    content?: Array<{ text?: unknown }>;
+  };
+  const anthropicText = parsed.content?.find((part) => typeof part.text === "string")?.text;
+  if (typeof anthropicText === "string" && anthropicText.trim()) return anthropicText.trim();
+  const message = parsed.choices?.[0]?.message ?? {};
+  for (const key of ["content", "reasoning_content", "reasoning"]) {
+    const value = message[key];
+    if (typeof value === "string" && value.trim()) return value.trim();
+  }
+  return "";
+}
+
+test.skipIf(!shouldRunLiveE2EScenarios())(
+  "Hermes inference set updates route/config and preserves live runtime",
+  { timeout: TIMEOUT_MS },
+  async ({ artifacts, cleanup, host, sandbox, secrets }) => {
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    await artifacts.writeJson("scenario.json", {
+      id: "hermes-inference-switch",
+      legacySource: "test/e2e/test-hermes-inference-switch.sh",
+      boundary: "install.sh + Hermes sandbox + inference set + in-sandbox health/chat probes",
+      sandboxName: SANDBOX_NAME,
+      switchProvider: SWITCH_PROVIDER,
+      switchModel: SWITCH_MODEL,
+      switchApi: SWITCH_API,
+    });
+
+    cleanup.add("destroy Hermes inference switch sandbox", async () => {
+      await bestEffort(() =>
+        host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
+          artifactName: "cleanup-nemoclaw-destroy",
+          env: env(),
+          timeoutMs: 120_000,
+        }),
+      );
+      await bestEffort(() =>
+        sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
+          artifactName: "cleanup-openshell-delete",
+          env: env(),
+          timeoutMs: 60_000,
+        }),
+      );
+    });
+
+    await bestEffort(() =>
+      host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
+        artifactName: "pre-cleanup-destroy",
+        env: env(),
+        timeoutMs: 120_000,
+      }),
+    );
+    await bestEffort(() =>
+      sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
+        artifactName: "pre-cleanup-delete",
+        env: env(),
+        timeoutMs: 60_000,
+      }),
+    );
+
+    const docker = await host.command("docker", ["info"], {
+      artifactName: "docker-info",
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 30_000,
+    });
+    expect(docker.exitCode, resultText(docker)).toBe(0);
+
+    let install: ShellProbeResult | undefined;
+    for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) {
+      install = await host.command(
+        "bash",
+        ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"],
+        {
+          artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`,
+          cwd: REPO_ROOT,
+          env: env(apiKey),
+          redactionValues: [apiKey],
+          timeoutMs: 25 * 60_000,
+        },
+      );
+      if (install.exitCode === 0) break;
+      if (isTransientProviderValidationFailure(install) && attempt < INSTALL_ATTEMPTS) {
+        await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt));
+        continue;
+      }
+      break;
+    }
+    expect(install, "install command must run").toBeDefined();
+    expect(install?.exitCode, resultText(install as ShellProbeResult)).toBe(0);
+
+    const pidBefore = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
+      ),
+      { artifactName: "pid-before", env: env(), timeoutMs: 30_000 },
+    );
+    const envHashBefore = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
+      artifactName: "env-hash-before",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+
+    const switched = await host.command(
+      "node",
+      [CLI, "inference", "set", "--provider", SWITCH_PROVIDER, "--model", SWITCH_MODEL],
+      {
+        artifactName: "hermes-inference-set",
+        env: env(apiKey),
+        redactionValues: [apiKey],
+        timeoutMs: 180_000,
+      },
+    );
+    expect(switched.exitCode, resultText(switched)).toBe(0);
+
+    const pidAfter = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
+      ),
+      { artifactName: "pid-after", env: env(), timeoutMs: 30_000 },
+    );
+    if (pidBefore.stdout.trim() && pidAfter.stdout.trim())
+      expect(pidAfter.stdout.trim()).toBe(pidBefore.stdout.trim());
+
+    const health = await sandbox.exec(
+      SANDBOX_NAME,
+      ["curl", "-sf", "--max-time", "10", "http://localhost:8642/health"],
+      { artifactName: "hermes-health-after-switch", env: env(), timeoutMs: 30_000 },
+    );
+    expect(health.exitCode, resultText(health)).toBe(0);
+    expect(resultText(health)).toMatch(/ok/i);
+
+    const route = await sandbox.openshell(["inference", "get", "-g", "nemoclaw"], {
+      artifactName: "openshell-inference-route",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    expect(route.exitCode, resultText(route)).toBe(0);
+    expect(resultText(route)).toContain(SWITCH_PROVIDER);
+    expect(resultText(route)).toContain(SWITCH_MODEL);
+
+    const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], {
+      artifactName: "hermes-config-yaml",
+      env: env(),
+      redactionValues: [apiKey],
+      timeoutMs: 30_000,
+    });
+    expect(config.exitCode, resultText(config)).toBe(0);
+    const model = parseHermesModelBlock(config.stdout);
+    expect(model.default).toBe(SWITCH_MODEL);
+    expect(model.provider).toBe("custom");
+    expect(model.base_url).toBe(
+      SWITCH_API === "anthropic-messages"
+        ? "https://inference.local"
+        : "https://inference.local/v1",
+    );
+    if (SWITCH_API === "anthropic-messages") expect(model.api_mode).toBe("anthropic_messages");
+    else if (SWITCH_API === "openai-responses") expect(model.api_mode).toBe("codex_responses");
+    else expect(model.api_mode).toBeUndefined();
+    const apiKeyShape = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        "python3 - <<'PY'\nimport re\ntext=open('/sandbox/.hermes/config.yaml', encoding='utf-8').read()\nmatch=re.search(r'^\\s+api_key:\\s*[\\\"\\']?(sk-[^\\\"\\'\\s]+)', text, re.M)\nraise SystemExit(0 if match else 1)\nPY",
+      ),
+      { artifactName: "hermes-config-api-key-shape", env: env(), timeoutMs: 30_000 },
+    );
+    expect(apiKeyShape.exitCode, resultText(apiKeyShape)).toBe(0);
+    expect(config.stdout).not.toMatch(/^models:\s*$/mu);
+
+    for (const [file, artifact] of [
+      ["/etc/nemoclaw/hermes.config-hash", "strict"],
+      ["/sandbox/.hermes/.config-hash", "compat"],
+    ] as const) {
+      const hash = await sandbox.execShell(
+        SANDBOX_NAME,
+        trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`),
+        { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 },
+      );
+      expect(hash.exitCode, resultText(hash)).toBe(0);
+      expect(hash.stdout).toContain("OK");
+    }
+    const strictHashPerms = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"),
+      { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 },
+    );
+    expect(strictHashPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u);
+    expect(Number.parseInt(strictHashPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0);
+
+    const envHashAfter = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
+      artifactName: "env-hash-after",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    if (envHashBefore.stdout.trim())
+      expect(envHashAfter.stdout.split(/\s+/u)[0]).toBe(envHashBefore.stdout.split(/\s+/u)[0]);
+
+    const registry = JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"),
+    );
+    expect(registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
+    expect(registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
+    expect(registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);
+    const session = JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"),
+    );
+    expect(session.sandboxName).toBe(SANDBOX_NAME);
+    expect(session.agent).toBe("hermes");
+    expect(session.provider).toBe(SWITCH_PROVIDER);
+    expect(session.model).toBe(SWITCH_MODEL);
+
+    const inferenceLocalPayload = JSON.stringify({
+      model: SWITCH_MODEL,
+      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+      max_tokens: 100,
+    });
+    const inferenceLocalCommand =
+      SWITCH_API === "anthropic-messages"
+        ? `curl -sS --max-time 90 https://inference.local/v1/messages -H 'Content-Type: application/json' -H 'anthropic-version: 2023-06-01' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`
+        : `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`;
+    const inferenceLocal = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(inferenceLocalCommand),
+      {
+        artifactName: "hermes-inference-local-chat-after-switch",
+        env: env(),
+        redactionValues: [apiKey],
+        timeoutMs: 120_000,
+      },
+    );
+    expect(inferenceLocal.exitCode, resultText(inferenceLocal)).toBe(0);
+    expect(chatContent(inferenceLocal.stdout)).toMatch(/PONG/i);
+
+    const payload = JSON.stringify({
+      model: SWITCH_MODEL,
+      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+      max_tokens: 100,
+    });
+    const chat = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${payload.replace(/'/gu, `'\\''`)}'`,
+      ),
+      {
+        artifactName: "hermes-api-chat-after-switch",
+        env: env(),
+        redactionValues: [apiKey],
+        timeoutMs: 150_000,
+      },
+    );
+    expect(chat.exitCode, resultText(chat)).toBe(0);
+    expect(chatContent(chat.stdout)).toMatch(/PONG/i);
+  },
+);

From 32ce1db6da139d18171067763578c67ca87a5277 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Sat, 20 Jun 2026 11:02:30 -0700
Subject: [PATCH 08/10] test(e2e): move Hermes switch branches to helpers

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../live/hermes-inference-switch-helpers.ts   | 236 +++++++++++++++
 .../live/hermes-inference-switch.test.ts      | 273 ++++--------------
 2 files changed, 297 insertions(+), 212 deletions(-)
 create mode 100644 test/e2e-scenario/live/hermes-inference-switch-helpers.ts

diff --git a/test/e2e-scenario/live/hermes-inference-switch-helpers.ts b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
new file mode 100644
index 0000000000..865b5f9124
--- /dev/null
+++ b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
@@ -0,0 +1,236 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import type { HostCliClient } from "../fixtures/clients/host.ts";
+import { resultText } from "../fixtures/clients/index.ts";
+import {
+  type SandboxClient,
+  trustedSandboxShellScript,
+  validateSandboxName,
+} from "../fixtures/clients/sandbox.ts";
+import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
+
+export const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+export const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+export const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch";
+validateSandboxName(SANDBOX_NAME);
+export const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
+export const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
+export const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
+const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
+
+export function env(apiKey?: string): NodeJS.ProcessEnv {
+  const out: NodeJS.ProcessEnv = {
+    ...buildAvailabilityProbeEnv(),
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    NEMOCLAW_AGENT: "hermes",
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+  };
+  apiKey && Object.assign(out, { NVIDIA_INFERENCE_API_KEY: apiKey, NVIDIA_API_KEY: apiKey });
+  return out;
+}
+
+export async function bestEffort(run: () => Promise<unknown>): Promise<void> {
+  try {
+    await run();
+  } catch {}
+}
+
+export function parseHermesModelBlock(text: string): Record<string, string> {
+  const model: Record<string, string> = {};
+  let inModel = false;
+  for (const line of text.split(/\r?\n/u)) {
+    const entersModel = /^model:\s*$/u.test(line);
+    entersModel && (inModel = true);
+    if (entersModel) continue;
+    if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break;
+    const match = inModel ? line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u) : null;
+    match && (model[match[1]] = match[2].replace(/^['"]|['"]$/gu, ""));
+  }
+  return model;
+}
+
+export function chatContent(raw: string): string {
+  const parsed = JSON.parse(raw) as {
+    choices?: Array<{ message?: Record<string, unknown> }>;
+    content?: Array<{ text?: unknown }>;
+  };
+  const anthropicText = parsed.content?.find((part) => typeof part.text === "string")?.text;
+  const message = parsed.choices?.[0]?.message ?? {};
+  const values = [anthropicText, message.content, message.reasoning_content, message.reasoning];
+  return (
+    values
+      .find((value): value is string => typeof value === "string" && value.trim().length > 0)
+      ?.trim() ?? ""
+  );
+}
+
+export async function cleanupHermesSwitch(
+  host: HostCliClient,
+  sandbox: SandboxClient,
+): Promise<void> {
+  await bestEffort(() =>
+    host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
+      artifactName: "cleanup-nemoclaw-destroy",
+      env: env(),
+      timeoutMs: 120_000,
+    }),
+  );
+  await bestEffort(() =>
+    sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
+      artifactName: "cleanup-openshell-delete",
+      env: env(),
+      timeoutMs: 60_000,
+    }),
+  );
+}
+
+export async function installHermes(
+  host: HostCliClient,
+  apiKey: string,
+): Promise<ShellProbeResult> {
+  let install: ShellProbeResult | undefined;
+  for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) {
+    install = await host.command(
+      "bash",
+      ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"],
+      {
+        artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`,
+        cwd: REPO_ROOT,
+        env: env(apiKey),
+        redactionValues: [apiKey],
+        timeoutMs: 25 * 60_000,
+      },
+    );
+    const retry =
+      install.exitCode !== 0 &&
+      isTransientProviderValidationFailure(install) &&
+      attempt < INSTALL_ATTEMPTS;
+    install.exitCode === 0 && (attempt = INSTALL_ATTEMPTS + 1);
+    retry && (await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt)));
+    !retry && install.exitCode !== 0 && (attempt = INSTALL_ATTEMPTS + 1);
+  }
+  if (!install) throw new Error("install command did not run");
+  return install;
+}
+
+export async function hermesGatewayPid(
+  sandbox: SandboxClient,
+  artifactName: string,
+): Promise<ShellProbeResult> {
+  return await sandbox.execShell(
+    SANDBOX_NAME,
+    trustedSandboxShellScript(
+      "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
+    ),
+    { artifactName, env: env(), timeoutMs: 30_000 },
+  );
+}
+
+export async function envHash(
+  sandbox: SandboxClient,
+  artifactName: string,
+): Promise<ShellProbeResult> {
+  return await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
+    artifactName,
+    env: env(),
+    timeoutMs: 30_000,
+  });
+}
+
+export function maybeAssertPidStable(
+  before: ShellProbeResult,
+  after: ShellProbeResult,
+  assertStable: (a: string, b: string) => void,
+): void {
+  const beforePid = before.stdout.trim();
+  const afterPid = after.stdout.trim();
+  beforePid && afterPid && assertStable(afterPid, beforePid);
+}
+
+export function expectedBaseUrl(): string {
+  return SWITCH_API === "anthropic-messages"
+    ? "https://inference.local"
+    : "https://inference.local/v1";
+}
+
+export function expectedApiMode(): string | undefined {
+  return new Map<string, string>([
+    ["anthropic-messages", "anthropic_messages"],
+    ["openai-responses", "codex_responses"],
+  ]).get(SWITCH_API);
+}
+
+export async function apiKeyShape(sandbox: SandboxClient): Promise<ShellProbeResult> {
+  return await sandbox.execShell(
+    SANDBOX_NAME,
+    trustedSandboxShellScript(
+      "python3 - <<'PY'\nimport re\ntext=open('/sandbox/.hermes/config.yaml', encoding='utf-8').read()\nmatch=re.search(r'^\\s+api_key:\\s*[\\\"\\']?(sk-[^\\\"\\'\\s]+)', text, re.M)\nraise SystemExit(0 if match else 1)\nPY",
+    ),
+    { artifactName: "hermes-config-api-key-shape", env: env(), timeoutMs: 30_000 },
+  );
+}
+
+export async function hashCheck(
+  sandbox: SandboxClient,
+  file: string,
+  artifact: string,
+): Promise<ShellProbeResult> {
+  return await sandbox.execShell(
+    SANDBOX_NAME,
+    trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`),
+    { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 },
+  );
+}
+
+export async function strictHashPerms(sandbox: SandboxClient): Promise<ShellProbeResult> {
+  return await sandbox.execShell(
+    SANDBOX_NAME,
+    trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"),
+    { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 },
+  );
+}
+
+export function maybeAssertEnvHashStable(
+  before: ShellProbeResult,
+  after: ShellProbeResult,
+  assertStable: (a: string, b: string) => void,
+): void {
+  const beforeHash = before.stdout.split(/\s+/u)[0] ?? "";
+  const afterHash = after.stdout.split(/\s+/u)[0] ?? "";
+  beforeHash && assertStable(afterHash, beforeHash);
+}
+
+export function registryState(): { registry: Record<string, any>; session: Record<string, any> } {
+  return {
+    registry: JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"),
+    ),
+    session: JSON.parse(
+      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"),
+    ),
+  };
+}
+
+function quotePayload(payload: string): string {
+  return payload.replace(/'/gu, `'\\''`);
+}
+
+export function inferenceLocalCommand(payload: string): string {
+  return SWITCH_API === "anthropic-messages"
+    ? `curl -sS --max-time 90 https://inference.local/v1/messages -H 'Content-Type: application/json' -H 'anthropic-version: 2023-06-01' -d '${quotePayload(payload)}'`
+    : `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${quotePayload(payload)}'`;
+}
+
+export function hermesApiCommand(payload: string): string {
+  return `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${quotePayload(payload)}'`;
+}
diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts
index ef16ab7a8b..b6014f3c42 100644
--- a/test/e2e-scenario/live/hermes-inference-switch.test.ts
+++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts
@@ -3,84 +3,38 @@
 
 /** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */
 
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-
 import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
 import { resultText } from "../fixtures/clients/index.ts";
-import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
+import { trustedSandboxShellScript } from "../fixtures/clients/sandbox.ts";
 import { expect, test } from "../fixtures/e2e-test.ts";
 import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
-import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
-import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
+import {
+  apiKeyShape,
+  CLI,
+  chatContent,
+  cleanupHermesSwitch,
+  env,
+  envHash,
+  expectedApiMode,
+  expectedBaseUrl,
+  hashCheck,
+  hermesApiCommand,
+  hermesGatewayPid,
+  inferenceLocalCommand,
+  installHermes,
+  maybeAssertEnvHashStable,
+  maybeAssertPidStable,
+  parseHermesModelBlock,
+  registryState,
+  SANDBOX_NAME,
+  SWITCH_API,
+  SWITCH_MODEL,
+  SWITCH_PROVIDER,
+  strictHashPerms,
+} from "./hermes-inference-switch-helpers.ts";
 
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
-const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch";
-validateSandboxName(SANDBOX_NAME);
-const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
-const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
-const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
-const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
 const TIMEOUT_MS = 45 * 60_000;
 
-function env(apiKey?: string): NodeJS.ProcessEnv {
-  const out: NodeJS.ProcessEnv = {
-    ...buildAvailabilityProbeEnv(),
-    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
-    NEMOCLAW_AGENT: "hermes",
-    NEMOCLAW_NON_INTERACTIVE: "1",
-    NEMOCLAW_RECREATE_SANDBOX: "1",
-    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
-    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
-  };
-  if (apiKey) {
-    out.NVIDIA_INFERENCE_API_KEY = apiKey;
-    out.NVIDIA_API_KEY = apiKey;
-  }
-  return out;
-}
-
-async function bestEffort(run: () => Promise<unknown>): Promise<void> {
-  try {
-    await run();
-  } catch {}
-}
-
-function parseHermesModelBlock(text: string): Record<string, string> {
-  const model: Record<string, string> = {};
-  let inModel = false;
-  for (const line of text.split(/\r?\n/u)) {
-    if (/^model:\s*$/u.test(line)) {
-      inModel = true;
-      continue;
-    }
-    if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break;
-    if (!inModel) continue;
-    const match = line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u);
-    if (!match) continue;
-    const value = match[2].replace(/^['"]|['"]$/gu, "");
-    model[match[1]] = value;
-  }
-  return model;
-}
-
-function chatContent(raw: string): string {
-  const parsed = JSON.parse(raw) as {
-    choices?: Array<{ message?: Record<string, unknown> }>;
-    content?: Array<{ text?: unknown }>;
-  };
-  const anthropicText = parsed.content?.find((part) => typeof part.text === "string")?.text;
-  if (typeof anthropicText === "string" && anthropicText.trim()) return anthropicText.trim();
-  const message = parsed.choices?.[0]?.message ?? {};
-  for (const key of ["content", "reasoning_content", "reasoning"]) {
-    const value = message[key];
-    if (typeof value === "string" && value.trim()) return value.trim();
-  }
-  return "";
-}
-
 test.skipIf(!shouldRunLiveE2EScenarios())(
   "Hermes inference set updates route/config and preserves live runtime",
   { timeout: TIMEOUT_MS },
@@ -96,37 +50,10 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
       switchApi: SWITCH_API,
     });
 
-    cleanup.add("destroy Hermes inference switch sandbox", async () => {
-      await bestEffort(() =>
-        host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
-          artifactName: "cleanup-nemoclaw-destroy",
-          env: env(),
-          timeoutMs: 120_000,
-        }),
-      );
-      await bestEffort(() =>
-        sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
-          artifactName: "cleanup-openshell-delete",
-          env: env(),
-          timeoutMs: 60_000,
-        }),
-      );
-    });
-
-    await bestEffort(() =>
-      host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
-        artifactName: "pre-cleanup-destroy",
-        env: env(),
-        timeoutMs: 120_000,
-      }),
-    );
-    await bestEffort(() =>
-      sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
-        artifactName: "pre-cleanup-delete",
-        env: env(),
-        timeoutMs: 60_000,
-      }),
+    cleanup.add("destroy Hermes inference switch sandbox", () =>
+      cleanupHermesSwitch(host, sandbox),
     );
+    await cleanupHermesSwitch(host, sandbox);
 
     const docker = await host.command("docker", ["info"], {
       artifactName: "docker-info",
@@ -135,41 +62,11 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     });
     expect(docker.exitCode, resultText(docker)).toBe(0);
 
-    let install: ShellProbeResult | undefined;
-    for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) {
-      install = await host.command(
-        "bash",
-        ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"],
-        {
-          artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`,
-          cwd: REPO_ROOT,
-          env: env(apiKey),
-          redactionValues: [apiKey],
-          timeoutMs: 25 * 60_000,
-        },
-      );
-      if (install.exitCode === 0) break;
-      if (isTransientProviderValidationFailure(install) && attempt < INSTALL_ATTEMPTS) {
-        await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt));
-        continue;
-      }
-      break;
-    }
-    expect(install, "install command must run").toBeDefined();
-    expect(install?.exitCode, resultText(install as ShellProbeResult)).toBe(0);
+    const install = await installHermes(host, apiKey);
+    expect(install.exitCode, resultText(install)).toBe(0);
 
-    const pidBefore = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
-      ),
-      { artifactName: "pid-before", env: env(), timeoutMs: 30_000 },
-    );
-    const envHashBefore = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
-      artifactName: "env-hash-before",
-      env: env(),
-      timeoutMs: 30_000,
-    });
+    const pidBefore = await hermesGatewayPid(sandbox, "pid-before");
+    const envHashBefore = await envHash(sandbox, "env-hash-before");
 
     const switched = await host.command(
       "node",
@@ -183,15 +80,8 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     );
     expect(switched.exitCode, resultText(switched)).toBe(0);
 
-    const pidAfter = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
-      ),
-      { artifactName: "pid-after", env: env(), timeoutMs: 30_000 },
-    );
-    if (pidBefore.stdout.trim() && pidAfter.stdout.trim())
-      expect(pidAfter.stdout.trim()).toBe(pidBefore.stdout.trim());
+    const pidAfter = await hermesGatewayPid(sandbox, "pid-after");
+    maybeAssertPidStable(pidBefore, pidAfter, (actual, expected) => expect(actual).toBe(expected));
 
     const health = await sandbox.exec(
       SANDBOX_NAME,
@@ -220,78 +110,44 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     const model = parseHermesModelBlock(config.stdout);
     expect(model.default).toBe(SWITCH_MODEL);
     expect(model.provider).toBe("custom");
-    expect(model.base_url).toBe(
-      SWITCH_API === "anthropic-messages"
-        ? "https://inference.local"
-        : "https://inference.local/v1",
-    );
-    if (SWITCH_API === "anthropic-messages") expect(model.api_mode).toBe("anthropic_messages");
-    else if (SWITCH_API === "openai-responses") expect(model.api_mode).toBe("codex_responses");
-    else expect(model.api_mode).toBeUndefined();
-    const apiKeyShape = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript(
-        "python3 - <<'PY'\nimport re\ntext=open('/sandbox/.hermes/config.yaml', encoding='utf-8').read()\nmatch=re.search(r'^\\s+api_key:\\s*[\\\"\\']?(sk-[^\\\"\\'\\s]+)', text, re.M)\nraise SystemExit(0 if match else 1)\nPY",
-      ),
-      { artifactName: "hermes-config-api-key-shape", env: env(), timeoutMs: 30_000 },
-    );
-    expect(apiKeyShape.exitCode, resultText(apiKeyShape)).toBe(0);
+    expect(model.base_url).toBe(expectedBaseUrl());
+    expect(model.api_mode).toBe(expectedApiMode());
+    expect((await apiKeyShape(sandbox)).exitCode).toBe(0);
     expect(config.stdout).not.toMatch(/^models:\s*$/mu);
 
-    for (const [file, artifact] of [
-      ["/etc/nemoclaw/hermes.config-hash", "strict"],
-      ["/sandbox/.hermes/.config-hash", "compat"],
-    ] as const) {
-      const hash = await sandbox.execShell(
-        SANDBOX_NAME,
-        trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`),
-        { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 },
-      );
-      expect(hash.exitCode, resultText(hash)).toBe(0);
-      expect(hash.stdout).toContain("OK");
-    }
-    const strictHashPerms = await sandbox.execShell(
-      SANDBOX_NAME,
-      trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"),
-      { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 },
+    const strictHash = await hashCheck(sandbox, "/etc/nemoclaw/hermes.config-hash", "strict");
+    expect(strictHash.exitCode, resultText(strictHash)).toBe(0);
+    expect(strictHash.stdout).toContain("OK");
+    const compatHash = await hashCheck(sandbox, "/sandbox/.hermes/.config-hash", "compat");
+    expect(compatHash.exitCode, resultText(compatHash)).toBe(0);
+    expect(compatHash.stdout).toContain("OK");
+    const strictPerms = await strictHashPerms(sandbox);
+    expect(strictPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u);
+    expect(Number.parseInt(strictPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0);
+
+    maybeAssertEnvHashStable(
+      envHashBefore,
+      await envHash(sandbox, "env-hash-after"),
+      (actual, expected) => expect(actual).toBe(expected),
     );
-    expect(strictHashPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u);
-    expect(Number.parseInt(strictHashPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0);
-
-    const envHashAfter = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
-      artifactName: "env-hash-after",
-      env: env(),
-      timeoutMs: 30_000,
-    });
-    if (envHashBefore.stdout.trim())
-      expect(envHashAfter.stdout.split(/\s+/u)[0]).toBe(envHashBefore.stdout.split(/\s+/u)[0]);
 
-    const registry = JSON.parse(
-      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"),
-    );
-    expect(registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
-    expect(registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
-    expect(registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);
-    const session = JSON.parse(
-      fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"),
-    );
-    expect(session.sandboxName).toBe(SANDBOX_NAME);
-    expect(session.agent).toBe("hermes");
-    expect(session.provider).toBe(SWITCH_PROVIDER);
-    expect(session.model).toBe(SWITCH_MODEL);
+    const state = registryState();
+    expect(state.registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
+    expect(state.registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
+    expect(state.registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);
+    expect(state.session.sandboxName).toBe(SANDBOX_NAME);
+    expect(state.session.agent).toBe("hermes");
+    expect(state.session.provider).toBe(SWITCH_PROVIDER);
+    expect(state.session.model).toBe(SWITCH_MODEL);
 
     const inferenceLocalPayload = JSON.stringify({
       model: SWITCH_MODEL,
       messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
       max_tokens: 100,
     });
-    const inferenceLocalCommand =
-      SWITCH_API === "anthropic-messages"
-        ? `curl -sS --max-time 90 https://inference.local/v1/messages -H 'Content-Type: application/json' -H 'anthropic-version: 2023-06-01' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`
-        : `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`;
     const inferenceLocal = await sandbox.execShell(
       SANDBOX_NAME,
-      trustedSandboxShellScript(inferenceLocalCommand),
+      trustedSandboxShellScript(inferenceLocalCommand(inferenceLocalPayload)),
       {
         artifactName: "hermes-inference-local-chat-after-switch",
         env: env(),
@@ -302,16 +158,9 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     expect(inferenceLocal.exitCode, resultText(inferenceLocal)).toBe(0);
     expect(chatContent(inferenceLocal.stdout)).toMatch(/PONG/i);
 
-    const payload = JSON.stringify({
-      model: SWITCH_MODEL,
-      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
-      max_tokens: 100,
-    });
     const chat = await sandbox.execShell(
       SANDBOX_NAME,
-      trustedSandboxShellScript(
-        `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${payload.replace(/'/gu, `'\\''`)}'`,
-      ),
+      trustedSandboxShellScript(hermesApiCommand(inferenceLocalPayload)),
       {
         artifactName: "hermes-api-chat-after-switch",
         env: env(),

From 09b04df31010a0fbe3c240b817396aeeb11649f9 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Sat, 20 Jun 2026 11:07:08 -0700
Subject: [PATCH 09/10] Apply suggestions from code review

Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com>
---
 test/e2e-scenario/live/hermes-inference-switch-helpers.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/e2e-scenario/live/hermes-inference-switch-helpers.ts b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
index 865b5f9124..baf15a54d0 100644
--- a/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
+++ b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
@@ -7,7 +7,6 @@ import path from "node:path";
 
 import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
 import type { HostCliClient } from "../fixtures/clients/host.ts";
-import { resultText } from "../fixtures/clients/index.ts";
 import {
   type SandboxClient,
   trustedSandboxShellScript,

From 6258816df5bb864a9f928711439efc877087cfe1 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Sat, 20 Jun 2026 12:39:41 -0700
Subject: [PATCH 10/10] test(e2e): add compatible Anthropic Hermes switch setup

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../live/hermes-inference-switch-helpers.ts   | 165 +++++++++++++++++-
 .../live/hermes-inference-switch.test.ts      |   2 +
 2 files changed, 165 insertions(+), 2 deletions(-)

diff --git a/test/e2e-scenario/live/hermes-inference-switch-helpers.ts b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
index baf15a54d0..1c8e7272f7 100644
--- a/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
+++ b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts
@@ -2,6 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import fs from "node:fs";
+import http, { type Server } from "node:http";
+import type { AddressInfo } from "node:net";
 import os from "node:os";
 import path from "node:path";
 
@@ -12,6 +14,7 @@ import {
   trustedSandboxShellScript,
   validateSandboxName,
 } from "../fixtures/clients/sandbox.ts";
+import { expect } from "../fixtures/e2e-test.ts";
 import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
 import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
 
@@ -22,9 +25,16 @@ validateSandboxName(SANDBOX_NAME);
 export const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
 export const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
 export const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
+const SWITCH_MOCK_ANTHROPIC = process.env.NEMOCLAW_SWITCH_MOCK_ANTHROPIC ?? "0";
+const SWITCH_MOCK_PORT = Number.parseInt(process.env.NEMOCLAW_SWITCH_MOCK_PORT ?? "0", 10);
 const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
 
-export function env(apiKey?: string): NodeJS.ProcessEnv {
+interface MockAnthropicProvider {
+  endpointUrl: string;
+  close(): Promise<void>;
+}
+
+export function env(apiKey?: string, extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
   const out: NodeJS.ProcessEnv = {
     ...buildAvailabilityProbeEnv(),
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
@@ -35,7 +45,7 @@ export function env(apiKey?: string): NodeJS.ProcessEnv {
     OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
   };
   apiKey && Object.assign(out, { NVIDIA_INFERENCE_API_KEY: apiKey, NVIDIA_API_KEY: apiKey });
-  return out;
+  return { ...out, ...extra };
 }
 
 export async function bestEffort(run: () => Promise<unknown>): Promise<void> {
@@ -93,6 +103,157 @@ export async function cleanupHermesSwitch(
   );
 }
 
+function jsonResponse(res: http.ServerResponse, status: number, payload: unknown): void {
+  const body = JSON.stringify(payload);
+  res.writeHead(status, {
+    "content-type": "application/json",
+    "content-length": Buffer.byteLength(body),
+  });
+  res.end(body);
+}
+
+function sseResponse(res: http.ServerResponse, events: Array<[string, unknown]>): void {
+  res.writeHead(200, { "content-type": "text/event-stream", "cache-control": "no-cache" });
+  for (const [name, payload] of events) {
+    res.write(`event: ${name}\n`);
+    res.write(`data: ${JSON.stringify(payload)}\n\n`);
+  }
+  res.end();
+}
+
+function closeServer(server: Server): Promise<void> {
+  return new Promise((resolve, reject) => {
+    server.close((error) => (error ? reject(error) : resolve()));
+  });
+}
+
+async function startMockAnthropicProvider(): Promise<MockAnthropicProvider> {
+  const server = http.createServer((req, res) => {
+    const url = new URL(req.url ?? "/", "http://mock.local");
+    if (req.method === "GET" && url.pathname === "/health")
+      return jsonResponse(res, 200, { ok: true });
+    if (
+      req.method === "GET" &&
+      ["/v1/models", "/v1/models/mock-anthropic-model"].includes(url.pathname)
+    ) {
+      return jsonResponse(res, 200, { data: [{ id: "mock-anthropic-model" }] });
+    }
+    if (req.method !== "POST" || url.pathname !== "/v1/messages") {
+      return jsonResponse(res, 404, { error: "not found", path: url.pathname });
+    }
+    let raw = "";
+    req.setEncoding("utf8");
+    req.on("data", (chunk) => {
+      raw += chunk;
+    });
+    req.on("end", () => {
+      const payload = JSON.parse(raw || "{}") as { model?: unknown; stream?: unknown };
+      const model = typeof payload.model === "string" ? payload.model : "mock-anthropic-model";
+      if (payload.stream === true) {
+        return sseResponse(res, [
+          [
+            "message_start",
+            {
+              type: "message_start",
+              message: {
+                id: "msg_mock",
+                type: "message",
+                role: "assistant",
+                model,
+                content: [],
+                stop_reason: null,
+                stop_sequence: null,
+                usage: { input_tokens: 1, output_tokens: 0 },
+              },
+            },
+          ],
+          [
+            "content_block_start",
+            { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
+          ],
+          [
+            "content_block_delta",
+            { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "PONG" } },
+          ],
+          ["content_block_stop", { type: "content_block_stop", index: 0 }],
+          [
+            "message_delta",
+            {
+              type: "message_delta",
+              delta: { stop_reason: "end_turn", stop_sequence: null },
+              usage: { output_tokens: 1 },
+            },
+          ],
+          ["message_stop", { type: "message_stop" }],
+        ]);
+      }
+      return jsonResponse(res, 200, {
+        id: "msg_mock",
+        type: "message",
+        role: "assistant",
+        model,
+        content: [{ type: "text", text: "PONG" }],
+        stop_reason: "end_turn",
+        usage: { input_tokens: 1, output_tokens: 1 },
+      });
+    });
+  });
+  await new Promise<void>((resolve, reject) => {
+    server.once("error", reject);
+    server.listen(SWITCH_MOCK_PORT, "0.0.0.0", () => {
+      server.off("error", reject);
+      resolve();
+    });
+  });
+  const address = server.address();
+  if (!address || typeof address === "string") {
+    await closeServer(server);
+    throw new Error("mock Anthropic provider did not expose a TCP port");
+  }
+  return {
+    endpointUrl: `http://host.openshell.internal:${(address as AddressInfo).port}`,
+    close: () => closeServer(server),
+  };
+}
+
+export async function ensureCompatibleAnthropicSwitchProvider(
+  host: HostCliClient,
+  cleanup: { add(name: string, run: () => Promise<void> | void): void },
+): Promise<void> {
+  if (SWITCH_PROVIDER !== "compatible-anthropic-endpoint" || SWITCH_API !== "anthropic-messages")
+    return;
+  const mock = SWITCH_MOCK_ANTHROPIC === "1" ? await startMockAnthropicProvider() : undefined;
+  mock && cleanup.add("close compatible Anthropic switch mock", () => mock.close());
+  const endpointUrl = process.env.NEMOCLAW_SWITCH_ENDPOINT_URL ?? mock?.endpointUrl ?? "";
+  const compatibleKey = process.env.COMPATIBLE_ANTHROPIC_API_KEY ?? "test-compatible-anthropic-key";
+  expect(
+    endpointUrl,
+    "NEMOCLAW_SWITCH_ENDPOINT_URL is required for compatible Anthropic inference switches",
+  ).not.toBe("");
+  expect(
+    compatibleKey,
+    "COMPATIBLE_ANTHROPIC_API_KEY is required for compatible Anthropic inference switches",
+  ).not.toBe("");
+  const providerScript = [
+    "set -euo pipefail",
+    "if openshell provider get -g nemoclaw compatible-anthropic-endpoint >/dev/null 2>&1; then",
+    '  openshell provider update -g nemoclaw compatible-anthropic-endpoint --credential COMPATIBLE_ANTHROPIC_API_KEY --config "ANTHROPIC_BASE_URL=${SWITCH_ENDPOINT_URL}"',
+    "else",
+    '  openshell provider create -g nemoclaw --name compatible-anthropic-endpoint --type anthropic --credential COMPATIBLE_ANTHROPIC_API_KEY --config "ANTHROPIC_BASE_URL=${SWITCH_ENDPOINT_URL}"',
+    "fi",
+  ].join("\n");
+  const result = await host.command("bash", ["-lc", providerScript], {
+    artifactName: "register-compatible-anthropic-switch-provider",
+    env: env(undefined, {
+      COMPATIBLE_ANTHROPIC_API_KEY: compatibleKey,
+      SWITCH_ENDPOINT_URL: endpointUrl,
+    }),
+    redactionValues: [compatibleKey],
+    timeoutMs: 120_000,
+  });
+  expect(result.exitCode).toBe(0);
+}
+
 export async function installHermes(
   host: HostCliClient,
   apiKey: string,
diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts
index b6014f3c42..b57ca4652d 100644
--- a/test/e2e-scenario/live/hermes-inference-switch.test.ts
+++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts
@@ -13,6 +13,7 @@ import {
   CLI,
   chatContent,
   cleanupHermesSwitch,
+  ensureCompatibleAnthropicSwitchProvider,
   env,
   envHash,
   expectedApiMode,
@@ -64,6 +65,7 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
 
     const install = await installHermes(host, apiKey);
     expect(install.exitCode, resultText(install)).toBe(0);
+    await ensureCompatibleAnthropicSwitchProvider(host, cleanup);
 
     const pidBefore = await hermesGatewayPid(sandbox, "pid-before");
     const envHashBefore = await envHash(sandbox, "env-hash-before");