-
Notifications
You must be signed in to change notification settings - Fork 2.9k
test(e2e): migrate Hermes inference switch to vitest #5553
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+623
−0
Merged
Changes from 2 commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
bea95ca
test(e2e): migrate Hermes inference switch to vitest
cv d965382
test(e2e): restore Hermes switch assertions
cv ed011cd
test(e2e): cover Hermes switch API modes
cv 3f43d8d
test(e2e): avoid Hermes api key redaction false negative
cv 6b6d1a9
Merge remote-tracking branch 'origin/main' into e2e-migrate/test-herm…
cv 9bb6ab7
Merge remote-tracking branch 'origin/main' into e2e-migrate/test-herm…
cv fcedfdb
test(e2e): relax workflow inventory timeout
cv 8ef4371
Merge branch 'main' into e2e-migrate/test-hermes-inference-switch
cv 969a853
test(e2e): move scenario logic out of test wrapper
cv df7f8d6
Merge remote-tracking branch 'origin/e2e-migrate/test-hermes-inferenc…
cv c57bee3
test(e2e): restore scenario logic to test file
cv 32ce1db
test(e2e): move Hermes switch branches to helpers
cv 09b04df
Apply suggestions from code review
cv c4e8b2d
Merge remote-tracking branch 'origin/main' into e2e-migrate/test-herm…
cv 6258816
test(e2e): add compatible Anthropic Hermes switch setup
cv 15c05a3
Merge branch 'main' into e2e-migrate/test-hermes-inference-switch
cv File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,311 @@ | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| /** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */ | ||
|
|
||
| import fs from "node:fs"; | ||
| import os from "node:os"; | ||
| import path from "node:path"; | ||
|
|
||
| import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; | ||
| import { resultText } from "../fixtures/clients/index.ts"; | ||
| import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts"; | ||
| import { expect, test } from "../fixtures/e2e-test.ts"; | ||
| import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; | ||
| import type { ShellProbeResult } from "../fixtures/shell-probe.ts"; | ||
| import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts"; | ||
|
|
||
| const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); | ||
| const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js"); | ||
| const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch"; | ||
| validateSandboxName(SANDBOX_NAME); | ||
| const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod"; | ||
| const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1"; | ||
| const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions"; | ||
| const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1; | ||
| const TIMEOUT_MS = 45 * 60_000; | ||
|
|
||
| function env(apiKey?: string): NodeJS.ProcessEnv { | ||
| const out: NodeJS.ProcessEnv = { | ||
| ...buildAvailabilityProbeEnv(), | ||
| NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", | ||
| NEMOCLAW_AGENT: "hermes", | ||
| NEMOCLAW_NON_INTERACTIVE: "1", | ||
| NEMOCLAW_RECREATE_SANDBOX: "1", | ||
| NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME, | ||
| OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw", | ||
| }; | ||
| if (apiKey) { | ||
| out.NVIDIA_INFERENCE_API_KEY = apiKey; | ||
| out.NVIDIA_API_KEY = apiKey; | ||
| } | ||
| return out; | ||
| } | ||
|
|
||
| async function bestEffort(run: () => Promise<unknown>): Promise<void> { | ||
| try { | ||
| await run(); | ||
| } catch {} | ||
| } | ||
|
|
||
| function parseHermesModelBlock(text: string): Record<string, string> { | ||
| const model: Record<string, string> = {}; | ||
| let inModel = false; | ||
| for (const line of text.split(/\r?\n/u)) { | ||
| if (/^model:\s*$/u.test(line)) { | ||
| inModel = true; | ||
| continue; | ||
| } | ||
| if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break; | ||
| if (!inModel) continue; | ||
| const match = line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u); | ||
| if (!match) continue; | ||
| const value = match[2].replace(/^['"]|['"]$/gu, ""); | ||
| model[match[1]] = value; | ||
| } | ||
| return model; | ||
| } | ||
|
|
||
| function chatContent(raw: string): string { | ||
| const parsed = JSON.parse(raw) as { choices?: Array<{ message?: Record<string, unknown> }> }; | ||
| const message = parsed.choices?.[0]?.message ?? {}; | ||
| for (const key of ["content", "reasoning_content", "reasoning"]) { | ||
| const value = message[key]; | ||
| if (typeof value === "string" && value.trim()) return value.trim(); | ||
| } | ||
| return ""; | ||
| } | ||
|
|
||
| test.skipIf(!shouldRunLiveE2EScenarios())( | ||
| "Hermes inference set updates route/config and preserves live runtime", | ||
| { timeout: TIMEOUT_MS }, | ||
| async ({ artifacts, cleanup, host, sandbox, secrets }) => { | ||
| const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY"); | ||
| await artifacts.writeJson("scenario.json", { | ||
| id: "hermes-inference-switch", | ||
| legacySource: "test/e2e/test-hermes-inference-switch.sh", | ||
| boundary: "install.sh + Hermes sandbox + inference set + in-sandbox health/chat probes", | ||
| sandboxName: SANDBOX_NAME, | ||
| switchProvider: SWITCH_PROVIDER, | ||
| switchModel: SWITCH_MODEL, | ||
| switchApi: SWITCH_API, | ||
| }); | ||
|
|
||
| cleanup.add("destroy Hermes inference switch sandbox", async () => { | ||
| await bestEffort(() => | ||
| host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], { | ||
| artifactName: "cleanup-nemoclaw-destroy", | ||
| env: env(), | ||
| timeoutMs: 120_000, | ||
| }), | ||
| ); | ||
| await bestEffort(() => | ||
| sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], { | ||
| artifactName: "cleanup-openshell-delete", | ||
| env: env(), | ||
| timeoutMs: 60_000, | ||
| }), | ||
| ); | ||
| }); | ||
|
|
||
| await bestEffort(() => | ||
| host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], { | ||
| artifactName: "pre-cleanup-destroy", | ||
| env: env(), | ||
| timeoutMs: 120_000, | ||
| }), | ||
| ); | ||
| await bestEffort(() => | ||
| sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], { | ||
| artifactName: "pre-cleanup-delete", | ||
| env: env(), | ||
| timeoutMs: 60_000, | ||
| }), | ||
| ); | ||
|
|
||
| const docker = await host.command("docker", ["info"], { | ||
| artifactName: "docker-info", | ||
| env: buildAvailabilityProbeEnv(), | ||
| timeoutMs: 30_000, | ||
| }); | ||
| expect(docker.exitCode, resultText(docker)).toBe(0); | ||
|
|
||
| let install: ShellProbeResult | undefined; | ||
| for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) { | ||
| install = await host.command( | ||
| "bash", | ||
| ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"], | ||
| { | ||
| artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`, | ||
| cwd: REPO_ROOT, | ||
| env: env(apiKey), | ||
| redactionValues: [apiKey], | ||
| timeoutMs: 25 * 60_000, | ||
| }, | ||
| ); | ||
| if (install.exitCode === 0) break; | ||
| if (isTransientProviderValidationFailure(install) && attempt < INSTALL_ATTEMPTS) { | ||
| await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt)); | ||
| continue; | ||
| } | ||
| break; | ||
| } | ||
| expect(install, "install command must run").toBeDefined(); | ||
| expect(install?.exitCode, resultText(install as ShellProbeResult)).toBe(0); | ||
|
|
||
| const pidBefore = await sandbox.execShell( | ||
| SANDBOX_NAME, | ||
| trustedSandboxShellScript( | ||
| "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'", | ||
| ), | ||
| { artifactName: "pid-before", env: env(), timeoutMs: 30_000 }, | ||
| ); | ||
| const envHashBefore = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], { | ||
| artifactName: "env-hash-before", | ||
| env: env(), | ||
| timeoutMs: 30_000, | ||
| }); | ||
|
|
||
| const switched = await host.command( | ||
| "node", | ||
| [CLI, "inference", "set", "--provider", SWITCH_PROVIDER, "--model", SWITCH_MODEL], | ||
| { | ||
| artifactName: "hermes-inference-set", | ||
| env: env(apiKey), | ||
| redactionValues: [apiKey], | ||
| timeoutMs: 180_000, | ||
| }, | ||
| ); | ||
| expect(switched.exitCode, resultText(switched)).toBe(0); | ||
|
|
||
| const pidAfter = await sandbox.execShell( | ||
| SANDBOX_NAME, | ||
| trustedSandboxShellScript( | ||
| "ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'", | ||
| ), | ||
| { artifactName: "pid-after", env: env(), timeoutMs: 30_000 }, | ||
| ); | ||
| if (pidBefore.stdout.trim() && pidAfter.stdout.trim()) | ||
| expect(pidAfter.stdout.trim()).toBe(pidBefore.stdout.trim()); | ||
|
|
||
| const health = await sandbox.exec( | ||
| SANDBOX_NAME, | ||
| ["curl", "-sf", "--max-time", "10", "http://localhost:8642/health"], | ||
| { artifactName: "hermes-health-after-switch", env: env(), timeoutMs: 30_000 }, | ||
| ); | ||
| expect(health.exitCode, resultText(health)).toBe(0); | ||
| expect(resultText(health)).toMatch(/ok/i); | ||
|
|
||
| const route = await sandbox.openshell(["inference", "get", "-g", "nemoclaw"], { | ||
| artifactName: "openshell-inference-route", | ||
| env: env(), | ||
| timeoutMs: 30_000, | ||
| }); | ||
| expect(route.exitCode, resultText(route)).toBe(0); | ||
| expect(resultText(route)).toContain(SWITCH_PROVIDER); | ||
| expect(resultText(route)).toContain(SWITCH_MODEL); | ||
|
|
||
| const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], { | ||
| artifactName: "hermes-config-yaml", | ||
| env: env(), | ||
| redactionValues: [apiKey], | ||
| timeoutMs: 30_000, | ||
| }); | ||
| expect(config.exitCode, resultText(config)).toBe(0); | ||
| const model = parseHermesModelBlock(config.stdout); | ||
| expect(model.default).toBe(SWITCH_MODEL); | ||
| expect(model.provider).toBe("custom"); | ||
| expect(model.base_url).toBe( | ||
| SWITCH_API === "anthropic-messages" | ||
| ? "https://inference.local" | ||
| : "https://inference.local/v1", | ||
| ); | ||
| if (SWITCH_API === "anthropic-messages") expect(model.api_mode).toBe("anthropic_messages"); | ||
| else if (SWITCH_API === "openai-responses") expect(model.api_mode).toBe("codex_responses"); | ||
| else expect(model.api_mode).toBeUndefined(); | ||
| expect(model.api_key).toMatch(/^sk-/u); | ||
| expect(config.stdout).not.toMatch(/^models:\s*$/mu); | ||
|
|
||
| for (const [file, artifact] of [ | ||
| ["/etc/nemoclaw/hermes.config-hash", "strict"], | ||
| ["/sandbox/.hermes/.config-hash", "compat"], | ||
| ] as const) { | ||
| const hash = await sandbox.execShell( | ||
| SANDBOX_NAME, | ||
| trustedSandboxShellScript(`sha256sum -c ${file} --status && echo OK`), | ||
| { artifactName: `hermes-${artifact}-hash-check`, env: env(), timeoutMs: 30_000 }, | ||
| ); | ||
| expect(hash.exitCode, resultText(hash)).toBe(0); | ||
| expect(hash.stdout).toContain("OK"); | ||
| } | ||
| const strictHashPerms = await sandbox.execShell( | ||
| SANDBOX_NAME, | ||
| trustedSandboxShellScript("stat -c '%u %a' /etc/nemoclaw/hermes.config-hash"), | ||
| { artifactName: "hermes-strict-hash-perms", env: env(), timeoutMs: 30_000 }, | ||
| ); | ||
| expect(strictHashPerms.stdout.trim()).toMatch(/^0\s+[0-7]+$/u); | ||
| expect(Number.parseInt(strictHashPerms.stdout.trim().split(/\s+/u)[1], 8) & 0o222).toBe(0); | ||
|
|
||
| const envHashAfter = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], { | ||
| artifactName: "env-hash-after", | ||
| env: env(), | ||
| timeoutMs: 30_000, | ||
| }); | ||
| if (envHashBefore.stdout.trim()) | ||
| expect(envHashAfter.stdout.split(/\s+/u)[0]).toBe(envHashBefore.stdout.split(/\s+/u)[0]); | ||
|
|
||
| const registry = JSON.parse( | ||
| fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"), | ||
| ); | ||
| expect(registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes"); | ||
| expect(registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER); | ||
| expect(registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL); | ||
| const session = JSON.parse( | ||
| fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "onboard-session.json"), "utf8"), | ||
| ); | ||
| expect(session.sandboxName).toBe(SANDBOX_NAME); | ||
| expect(session.agent).toBe("hermes"); | ||
| expect(session.provider).toBe(SWITCH_PROVIDER); | ||
| expect(session.model).toBe(SWITCH_MODEL); | ||
|
|
||
| const inferenceLocalPayload = JSON.stringify({ | ||
| model: SWITCH_MODEL, | ||
| messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], | ||
| max_tokens: 100, | ||
| }); | ||
| const inferenceLocal = await sandbox.execShell( | ||
| SANDBOX_NAME, | ||
| trustedSandboxShellScript( | ||
| `curl -sS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${inferenceLocalPayload.replace(/'/gu, `'\\''`)}'`, | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
| ), | ||
| { | ||
| artifactName: "hermes-inference-local-chat-after-switch", | ||
| env: env(), | ||
| redactionValues: [apiKey], | ||
| timeoutMs: 120_000, | ||
| }, | ||
| ); | ||
| expect(inferenceLocal.exitCode, resultText(inferenceLocal)).toBe(0); | ||
| expect(chatContent(inferenceLocal.stdout)).toMatch(/PONG/i); | ||
|
|
||
| const payload = JSON.stringify({ | ||
| model: SWITCH_MODEL, | ||
| messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], | ||
| max_tokens: 100, | ||
| }); | ||
| const chat = await sandbox.execShell( | ||
| SANDBOX_NAME, | ||
| trustedSandboxShellScript( | ||
| `set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${payload.replace(/'/gu, `'\\''`)}'`, | ||
| ), | ||
| { | ||
| artifactName: "hermes-api-chat-after-switch", | ||
| env: env(), | ||
| redactionValues: [apiKey], | ||
| timeoutMs: 150_000, | ||
| }, | ||
| ); | ||
| expect(chat.exitCode, resultText(chat)).toBe(0); | ||
| expect(chatContent(chat.stdout)).toMatch(/PONG/i); | ||
| }, | ||
| ); | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.