diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index 6c11c5f8a9..9522b7471d 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -742,6 +742,55 @@ jobs: if-no-files-found: ignore retention-days: 14 + hermes-inference-switch-vitest: + needs: generate-matrix + if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',hermes-inference-switch-vitest,') || contains(format(',{0},', inputs.scenarios), ',hermes-inference-switch,') }} + runs-on: ubuntu-latest + timeout-minutes: 55 + env: + FREE_STANDING_VITEST_JOB: "1" + FREE_STANDING_SCENARIO_ID: "hermes-inference-switch" + E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/hermes-inference-switch + NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js + NEMOCLAW_RUN_E2E_SCENARIOS: "1" + NEMOCLAW_NON_INTERACTIVE: "1" + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" + NEMOCLAW_AGENT: "hermes" + NEMOCLAW_SANDBOX_NAME: "e2e-hermes-inference-switch" + OPENSHELL_GATEWAY: "nemoclaw" + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Set up Node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0 + with: + node-version: 22 + cache: npm + - name: Install root dependencies + run: npm ci --ignore-scripts + - name: Build CLI + run: npm run build:cli + - name: Install OpenShell CLI + run: bash scripts/install-openshell.sh + - name: Run Hermes inference switch live Vitest test + env: + NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + run: | + set -euo pipefail + export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH" + export OPENSHELL_BIN="$(command -v openshell || true)" + npx vitest run --project e2e-scenarios-live test/e2e-scenario/live/hermes-inference-switch.test.ts --silent=false --reporter=default + - name: Upload Hermes inference switch artifacts + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: e2e-vitest-scenarios-hermes-inference-switch + path: e2e-artifacts/vitest/hermes-inference-switch/ + include-hidden-files: false + if-no-files-found: ignore + retention-days: 14 + brave-search-vitest: needs: generate-matrix if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',brave-search-vitest,') || contains(format(',{0},', inputs.scenarios), ',brave-search,') }} @@ -3966,7 +4015,9 @@ jobs: openclaw-skill-cli-vitest, inference-routing-vitest, cloud-inference-vitest, + hermes-inference-switch-vitest, brave-search-vitest, + ollama-auth-proxy-vitest, cron-preflight-inference-local-vitest, diff --git a/test/e2e-scenario/live/hermes-inference-switch-helpers.ts b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts new file mode 100644 index 0000000000..1c8e7272f7 --- /dev/null +++ b/test/e2e-scenario/live/hermes-inference-switch-helpers.ts @@ -0,0 +1,396 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import http, { type Server } from "node:http"; +import type { AddressInfo } from "node:net"; +import os from "node:os"; +import path from "node:path"; + +import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; +import type { HostCliClient } from "../fixtures/clients/host.ts"; +import { + type SandboxClient, + trustedSandboxShellScript, + validateSandboxName, +} from "../fixtures/clients/sandbox.ts"; +import { expect } from "../fixtures/e2e-test.ts"; +import type { ShellProbeResult } from "../fixtures/shell-probe.ts"; +import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts"; + +export const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); +export const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js"); +export const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch"; +validateSandboxName(SANDBOX_NAME); +export const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod"; +export const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1"; +export const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions"; +const SWITCH_MOCK_ANTHROPIC = process.env.NEMOCLAW_SWITCH_MOCK_ANTHROPIC ?? "0"; +const SWITCH_MOCK_PORT = Number.parseInt(process.env.NEMOCLAW_SWITCH_MOCK_PORT ?? "0", 10); +const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1; + +interface MockAnthropicProvider { + endpointUrl: string; + close(): Promise; +} + +export function env(apiKey?: string, extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { + const out: NodeJS.ProcessEnv = { + ...buildAvailabilityProbeEnv(), + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", + NEMOCLAW_AGENT: "hermes", + NEMOCLAW_NON_INTERACTIVE: "1", + NEMOCLAW_RECREATE_SANDBOX: "1", + NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME, + OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw", + }; + apiKey && Object.assign(out, { NVIDIA_INFERENCE_API_KEY: apiKey, NVIDIA_API_KEY: apiKey }); + return { ...out, ...extra }; +} + +export async function bestEffort(run: () => Promise): Promise { + try { + await run(); + } catch {} +} + +export function parseHermesModelBlock(text: string): Record { + const model: Record = {}; + let inModel = false; + for (const line of text.split(/\r?\n/u)) { + const entersModel = /^model:\s*$/u.test(line); + entersModel && (inModel = true); + if (entersModel) continue; + if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break; + const match = inModel ? line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u) : null; + match && (model[match[1]] = match[2].replace(/^['"]|['"]$/gu, "")); + } + return model; +} + +export function chatContent(raw: string): string { + const parsed = JSON.parse(raw) as { + choices?: Array<{ message?: Record }>; + content?: Array<{ text?: unknown }>; + }; + const anthropicText = parsed.content?.find((part) => typeof part.text === "string")?.text; + const message = parsed.choices?.[0]?.message ?? {}; + const values = [anthropicText, message.content, message.reasoning_content, message.reasoning]; + return ( + values + .find((value): value is string => typeof value === "string" && value.trim().length > 0) + ?.trim() ?? "" + ); +} + +export async function cleanupHermesSwitch( + host: HostCliClient, + sandbox: SandboxClient, +): Promise { + await bestEffort(() => + host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], { + artifactName: "cleanup-nemoclaw-destroy", + env: env(), + timeoutMs: 120_000, + }), + ); + await bestEffort(() => + sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], { + artifactName: "cleanup-openshell-delete", + env: env(), + timeoutMs: 60_000, + }), + ); +} + +function jsonResponse(res: http.ServerResponse, status: number, payload: unknown): void { + const body = JSON.stringify(payload); + res.writeHead(status, { + "content-type": "application/json", + "content-length": Buffer.byteLength(body), + }); + res.end(body); +} + +function sseResponse(res: http.ServerResponse, events: Array<[string, unknown]>): void { + res.writeHead(200, { "content-type": "text/event-stream", "cache-control": "no-cache" }); + for (const [name, payload] of events) { + res.write(`event: ${name}\n`); + res.write(`data: ${JSON.stringify(payload)}\n\n`); + } + res.end(); +} + +function closeServer(server: Server): Promise { + return new Promise((resolve, reject) => { + server.close((error) => (error ? reject(error) : resolve())); + }); +} + +async function startMockAnthropicProvider(): Promise { + const server = http.createServer((req, res) => { + const url = new URL(req.url ?? "/", "http://mock.local"); + if (req.method === "GET" && url.pathname === "/health") + return jsonResponse(res, 200, { ok: true }); + if ( + req.method === "GET" && + ["/v1/models", "/v1/models/mock-anthropic-model"].includes(url.pathname) + ) { + return jsonResponse(res, 200, { data: [{ id: "mock-anthropic-model" }] }); + } + if (req.method !== "POST" || url.pathname !== "/v1/messages") { + return jsonResponse(res, 404, { error: "not found", path: url.pathname }); + } + let raw = ""; + req.setEncoding("utf8"); + req.on("data", (chunk) => { + raw += chunk; + }); + req.on("end", () => { + const payload = JSON.parse(raw || "{}") as { model?: unknown; stream?: unknown }; + const model = typeof payload.model === "string" ? payload.model : "mock-anthropic-model"; + if (payload.stream === true) { + return sseResponse(res, [ + [ + "message_start", + { + type: "message_start", + message: { + id: "msg_mock", + type: "message", + role: "assistant", + model, + content: [], + stop_reason: null, + stop_sequence: null, + usage: { input_tokens: 1, output_tokens: 0 }, + }, + }, + ], + [ + "content_block_start", + { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } }, + ], + [ + "content_block_delta", + { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "PONG" } }, + ], + ["content_block_stop", { type: "content_block_stop", index: 0 }], + [ + "message_delta", + { + type: "message_delta", + delta: { stop_reason: "end_turn", stop_sequence: null }, + usage: { output_tokens: 1 }, + }, + ], + ["message_stop", { type: "message_stop" }], + ]); + } + return jsonResponse(res, 200, { + id: "msg_mock", + type: "message", + role: "assistant", + model, + content: [{ type: "text", text: "PONG" }], + stop_reason: "end_turn", + usage: { input_tokens: 1, output_tokens: 1 }, + }); + }); + }); + await new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(SWITCH_MOCK_PORT, "0.0.0.0", () => { + server.off("error", reject); + resolve(); + }); + }); + const address = server.address(); + if (!address || typeof address === "string") { + await closeServer(server); + throw new Error("mock Anthropic provider did not expose a TCP port"); + } + return { + endpointUrl: `http://host.openshell.internal:${(address as AddressInfo).port}`, + close: () => closeServer(server), + }; +} + +export async function ensureCompatibleAnthropicSwitchProvider( + host: HostCliClient, + cleanup: { add(name: string, run: () => Promise | void): void }, +): Promise { + if (SWITCH_PROVIDER !== "compatible-anthropic-endpoint" || SWITCH_API !== "anthropic-messages") + return; + const mock = SWITCH_MOCK_ANTHROPIC === "1" ? await startMockAnthropicProvider() : undefined; + mock && cleanup.add("close compatible Anthropic switch mock", () => mock.close()); + const endpointUrl = process.env.NEMOCLAW_SWITCH_ENDPOINT_URL ?? mock?.endpointUrl ?? ""; + const compatibleKey = process.env.COMPATIBLE_ANTHROPIC_API_KEY ?? "test-compatible-anthropic-key"; + expect( + endpointUrl, + "NEMOCLAW_SWITCH_ENDPOINT_URL is required for compatible Anthropic inference switches", + ).not.toBe(""); + expect( + compatibleKey, + "COMPATIBLE_ANTHROPIC_API_KEY is required for compatible Anthropic inference switches", + ).not.toBe(""); + const providerScript = [ + "set -euo pipefail", + "if openshell provider get -g nemoclaw compatible-anthropic-endpoint >/dev/null 2>&1; then", + ' openshell provider update -g nemoclaw compatible-anthropic-endpoint --credential COMPATIBLE_ANTHROPIC_API_KEY --config "ANTHROPIC_BASE_URL=${SWITCH_ENDPOINT_URL}"', + "else", + ' openshell provider create -g nemoclaw --name compatible-anthropic-endpoint --type anthropic --credential COMPATIBLE_ANTHROPIC_API_KEY --config "ANTHROPIC_BASE_URL=${SWITCH_ENDPOINT_URL}"', + "fi", + ].join("\n"); + const result = await host.command("bash", ["-lc", providerScript], { + artifactName: "register-compatible-anthropic-switch-provider", + env: env(undefined, { + COMPATIBLE_ANTHROPIC_API_KEY: compatibleKey, + SWITCH_ENDPOINT_URL: endpointUrl, + }), + redactionValues: [compatibleKey], + timeoutMs: 120_000, + }); + expect(result.exitCode).toBe(0); +} + +export async function installHermes( + host: HostCliClient, + apiKey: string, +): Promise { + let install: ShellProbeResult | undefined; + for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) { + install = await host.command( + "bash", + ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"], + { + artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`, + cwd: REPO_ROOT, + env: env(apiKey), + redactionValues: [apiKey], + timeoutMs: 25 * 60_000, + }, + ); + const retry = + install.exitCode !== 0 && + isTransientProviderValidationFailure(install) && + attempt < INSTALL_ATTEMPTS; + install.exitCode === 0 && (attempt = INSTALL_ATTEMPTS + 1); + retry && (await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt))); + !retry && install.exitCode !== 0 && (attempt = INSTALL_ATTEMPTS + 1); + } + if (!install) throw new Error("install command did not run"); + return install; +} + +export async function hermesGatewayPid( + sandbox: SandboxClient, + artifactName: string, +): Promise { + return await sandbox.execShell( + SANDBOX_NAME, + trustedSandboxShellScript( + "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'", + ), + { artifactName, env: env(), timeoutMs: 30_000 }, + ); +} + +export async function envHash( + sandbox: SandboxClient, + artifactName: string, +): Promise { + return await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], { + artifactName, + env: env(), + timeoutMs: 30_000, + }); +} + +export function maybeAssertPidStable( + before: ShellProbeResult, + after: ShellProbeResult, + assertStable: (a: string, b: string) => void, +): void { + const beforePid = before.stdout.trim(); + const afterPid = after.stdout.trim(); + beforePid && afterPid && assertStable(afterPid, beforePid); +} + +export function expectedBaseUrl(): string { + return SWITCH_API === "anthropic-messages" + ? "https://inference.local" + : "https://inference.local/v1"; +} + +export function expectedApiMode(): string | undefined { + return new Map([ + ["anthropic-messages", "anthropic_messages"], + ["openai-responses", "codex_responses"], + ]).get(SWITCH_API); +} + +export async function apiKeyShape(sandbox: SandboxClient): Promise { + return await sandbox.execShell( + SANDBOX_NAME, + trustedSandboxShellScript( + "python3 - <<'PY'\nimport re\ntext=open('/sandbox/.hermes/config.yaml', encoding='utf-8').read()\nmatch=re.search(r'^\\s+api_key:\\s*[\\\"\\']?(sk-[^\\\"\\'\\s]+)', text, re.M)\nraise SystemExit(0 if match else 1)\nPY", + ), + { artifactName: "hermes-config-api-key-shape", env: env(), timeoutMs: 30_000 }, + ); +} + +export async function hashCheck( + sandbox: SandboxClient, + file: string, + artifact: string, +): Promise { + return await sandbox.execShell( + SANDBOX_NAME, + trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`), + { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 }, + ); +} + +export async function strictHashPerms(sandbox: SandboxClient): Promise { + return await sandbox.execShell( + SANDBOX_NAME, + trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"), + { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 }, + ); +} + +export function maybeAssertEnvHashStable( + before: ShellProbeResult, + after: ShellProbeResult, + assertStable: (a: string, b: string) => void, +): void { + const beforeHash = before.stdout.split(/\s+/u)[0] ?? ""; + const afterHash = after.stdout.split(/\s+/u)[0] ?? ""; + beforeHash && assertStable(afterHash, beforeHash); +} + +export function registryState(): { registry: Record; session: Record } { + return { + registry: JSON.parse( + fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"), + ), + session: JSON.parse( + fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"), + ), + }; +} + +function quotePayload(payload: string): string { + return payload.replace(/'/gu, `'\\''`); +} + +export function inferenceLocalCommand(payload: string): string { + return SWITCH_API === "anthropic-messages" + ? `curl -sS --max-time 90 https://inference.local/v1/messages -H 'Content-Type: application/json' -H 'anthropic-version: 2023-06-01' -d '${quotePayload(payload)}'` + : `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${quotePayload(payload)}'`; +} + +export function hermesApiCommand(payload: string): string { + return `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${quotePayload(payload)}'`; +} diff --git a/test/e2e-scenario/live/hermes-inference-switch.test.ts b/test/e2e-scenario/live/hermes-inference-switch.test.ts new file mode 100644 index 0000000000..b57ca4652d --- /dev/null +++ b/test/e2e-scenario/live/hermes-inference-switch.test.ts @@ -0,0 +1,176 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */ + +import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; +import { resultText } from "../fixtures/clients/index.ts"; +import { trustedSandboxShellScript } from "../fixtures/clients/sandbox.ts"; +import { expect, test } from "../fixtures/e2e-test.ts"; +import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; +import { + apiKeyShape, + CLI, + chatContent, + cleanupHermesSwitch, + ensureCompatibleAnthropicSwitchProvider, + env, + envHash, + expectedApiMode, + expectedBaseUrl, + hashCheck, + hermesApiCommand, + hermesGatewayPid, + inferenceLocalCommand, + installHermes, + maybeAssertEnvHashStable, + maybeAssertPidStable, + parseHermesModelBlock, + registryState, + SANDBOX_NAME, + SWITCH_API, + SWITCH_MODEL, + SWITCH_PROVIDER, + strictHashPerms, +} from "./hermes-inference-switch-helpers.ts"; + +const TIMEOUT_MS = 45 * 60_000; + +test.skipIf(!shouldRunLiveE2EScenarios())( + "Hermes inference set updates route/config and preserves live runtime", + { timeout: TIMEOUT_MS }, + async ({ artifacts, cleanup, host, sandbox, secrets }) => { + const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY"); + await artifacts.writeJson("scenario.json", { + id: "hermes-inference-switch", + legacySource: "test/e2e/test-hermes-inference-switch.sh", + boundary: "install.sh + Hermes sandbox + inference set + in-sandbox health/chat probes", + sandboxName: SANDBOX_NAME, + switchProvider: SWITCH_PROVIDER, + switchModel: SWITCH_MODEL, + switchApi: SWITCH_API, + }); + + cleanup.add("destroy Hermes inference switch sandbox", () => + cleanupHermesSwitch(host, sandbox), + ); + await cleanupHermesSwitch(host, sandbox); + + const docker = await host.command("docker", ["info"], { + artifactName: "docker-info", + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }); + expect(docker.exitCode, resultText(docker)).toBe(0); + + const install = await installHermes(host, apiKey); + expect(install.exitCode, resultText(install)).toBe(0); + await ensureCompatibleAnthropicSwitchProvider(host, cleanup); + + const pidBefore = await hermesGatewayPid(sandbox, "pid-before"); + const envHashBefore = await envHash(sandbox, "env-hash-before"); + + const switched = await host.command( + "node", + [CLI, "inference", "set", "--provider", SWITCH_PROVIDER, "--model", SWITCH_MODEL], + { + artifactName: "hermes-inference-set", + env: env(apiKey), + redactionValues: [apiKey], + timeoutMs: 180_000, + }, + ); + expect(switched.exitCode, resultText(switched)).toBe(0); + + const pidAfter = await hermesGatewayPid(sandbox, "pid-after"); + maybeAssertPidStable(pidBefore, pidAfter, (actual, expected) => expect(actual).toBe(expected)); + + const health = await sandbox.exec( + SANDBOX_NAME, + ["curl", "-sf", "--max-time", "10", "http://localhost:8642/health"], + { artifactName: "hermes-health-after-switch", env: env(), timeoutMs: 30_000 }, + ); + expect(health.exitCode, resultText(health)).toBe(0); + expect(resultText(health)).toMatch(/ok/i); + + const route = await sandbox.openshell(["inference", "get", "-g", "nemoclaw"], { + artifactName: "openshell-inference-route", + env: env(), + timeoutMs: 30_000, + }); + expect(route.exitCode, resultText(route)).toBe(0); + expect(resultText(route)).toContain(SWITCH_PROVIDER); + expect(resultText(route)).toContain(SWITCH_MODEL); + + const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], { + artifactName: "hermes-config-yaml", + env: env(), + redactionValues: [apiKey], + timeoutMs: 30_000, + }); + expect(config.exitCode, resultText(config)).toBe(0); + const model = parseHermesModelBlock(config.stdout); + expect(model.default).toBe(SWITCH_MODEL); + expect(model.provider).toBe("custom"); + expect(model.base_url).toBe(expectedBaseUrl()); + expect(model.api_mode).toBe(expectedApiMode()); + expect((await apiKeyShape(sandbox)).exitCode).toBe(0); + expect(config.stdout).not.toMatch(/^models:\s*$/mu); + + const strictHash = await hashCheck(sandbox, "/etc/nemoclaw/hermes.config-hash", "strict"); + expect(strictHash.exitCode, resultText(strictHash)).toBe(0); + expect(strictHash.stdout).toContain("OK"); + const compatHash = await hashCheck(sandbox, "/sandbox/.hermes/.config-hash", "compat"); + expect(compatHash.exitCode, resultText(compatHash)).toBe(0); + expect(compatHash.stdout).toContain("OK"); + const strictPerms = await strictHashPerms(sandbox); + expect(strictPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u); + expect(Number.parseInt(strictPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0); + + maybeAssertEnvHashStable( + envHashBefore, + await envHash(sandbox, "env-hash-after"), + (actual, expected) => expect(actual).toBe(expected), + ); + + const state = registryState(); + expect(state.registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes"); + expect(state.registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER); + expect(state.registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL); + expect(state.session.sandboxName).toBe(SANDBOX_NAME); + expect(state.session.agent).toBe("hermes"); + expect(state.session.provider).toBe(SWITCH_PROVIDER); + expect(state.session.model).toBe(SWITCH_MODEL); + + const inferenceLocalPayload = JSON.stringify({ + model: SWITCH_MODEL, + messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], + max_tokens: 100, + }); + const inferenceLocal = await sandbox.execShell( + SANDBOX_NAME, + trustedSandboxShellScript(inferenceLocalCommand(inferenceLocalPayload)), + { + artifactName: "hermes-inference-local-chat-after-switch", + env: env(), + redactionValues: [apiKey], + timeoutMs: 120_000, + }, + ); + expect(inferenceLocal.exitCode, resultText(inferenceLocal)).toBe(0); + expect(chatContent(inferenceLocal.stdout)).toMatch(/PONG/i); + + const chat = await sandbox.execShell( + SANDBOX_NAME, + trustedSandboxShellScript(hermesApiCommand(inferenceLocalPayload)), + { + artifactName: "hermes-api-chat-after-switch", + env: env(), + redactionValues: [apiKey], + timeoutMs: 150_000, + }, + ); + expect(chat.exitCode, resultText(chat)).toBe(0); + expect(chatContent(chat.stdout)).toMatch(/PONG/i); + }, +);