Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .github/workflows/e2e-vitest-scenarios.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,55 @@ jobs:
if-no-files-found: ignore
retention-days: 14

hermes-inference-switch-vitest:
needs: generate-matrix
if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',hermes-inference-switch-vitest,') || contains(format(',{0},', inputs.scenarios), ',hermes-inference-switch,') }}
runs-on: ubuntu-latest
timeout-minutes: 55
env:
FREE_STANDING_VITEST_JOB: "1"
FREE_STANDING_SCENARIO_ID: "hermes-inference-switch"
E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/hermes-inference-switch
NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js
NEMOCLAW_RUN_E2E_SCENARIOS: "1"
NEMOCLAW_NON_INTERACTIVE: "1"
NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
NEMOCLAW_AGENT: "hermes"
NEMOCLAW_SANDBOX_NAME: "e2e-hermes-inference-switch"
OPENSHELL_GATEWAY: "nemoclaw"
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Set up Node
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
with:
node-version: 22
cache: npm
- name: Install root dependencies
run: npm ci --ignore-scripts
- name: Build CLI
run: npm run build:cli
- name: Install OpenShell CLI
run: bash scripts/install-openshell.sh
- name: Run Hermes inference switch live Vitest test
env:
NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
run: |
set -euo pipefail
export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
export OPENSHELL_BIN="$(command -v openshell || true)"
npx vitest run --project e2e-scenarios-live test/e2e-scenario/live/hermes-inference-switch.test.ts --silent=false --reporter=default
- name: Upload Hermes inference switch artifacts
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: e2e-vitest-scenarios-hermes-inference-switch
path: e2e-artifacts/vitest/hermes-inference-switch/
include-hidden-files: false
if-no-files-found: ignore
retention-days: 14

issue-4434-tui-unreachable-inference-vitest:
needs: generate-matrix
if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',issue-4434-tui-unreachable-inference-vitest,') || contains(format(',{0},', inputs.scenarios), ',issue-4434-tui-unreachable-inference,') }}
Expand Down Expand Up @@ -3586,6 +3635,7 @@ jobs:
openclaw-skill-cli-vitest,
inference-routing-vitest,
cloud-inference-vitest,
hermes-inference-switch-vitest,
credential-sanitization-vitest,
credential-migration-vitest,
sessions-agents-cli-vitest,
Expand Down
260 changes: 260 additions & 0 deletions test/e2e-scenario/live/hermes-inference-switch.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

/** Live Vitest replacement for test/e2e/test-hermes-inference-switch.sh. */

import fs from "node:fs";
import os from "node:os";
import path from "node:path";

import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
import { resultText } from "../fixtures/clients/index.ts";
import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
import { expect, test } from "../fixtures/e2e-test.ts";
import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";

const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-hermes-inference-switch";
validateSandboxName(SANDBOX_NAME);
const SWITCH_PROVIDER = process.env.NEMOCLAW_SWITCH_PROVIDER ?? "nvidia-prod";
const SWITCH_MODEL = process.env.NEMOCLAW_SWITCH_MODEL ?? "z-ai/glm-5.1";
const SWITCH_API = process.env.NEMOCLAW_SWITCH_INFERENCE_API ?? "openai-completions";
const INSTALL_ATTEMPTS = process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true" ? 3 : 1;
const TIMEOUT_MS = 45 * 60_000;

function env(apiKey?: string): NodeJS.ProcessEnv {
const out: NodeJS.ProcessEnv = {
...buildAvailabilityProbeEnv(),
NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
NEMOCLAW_AGENT: "hermes",
NEMOCLAW_NON_INTERACTIVE: "1",
NEMOCLAW_RECREATE_SANDBOX: "1",
NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
};
if (apiKey) {
out.NVIDIA_INFERENCE_API_KEY = apiKey;
out.NVIDIA_API_KEY = apiKey;
}
return out;
}

async function bestEffort(run: () => Promise<unknown>): Promise<void> {
try {
await run();
} catch {}
}

function parseHermesModelBlock(text: string): Record<string, string> {
const model: Record<string, string> = {};
let inModel = false;
for (const line of text.split(/\r?\n/u)) {
if (/^model:\s*$/u.test(line)) {
inModel = true;
continue;
}
if (inModel && /^[A-Za-z0-9_-]+:/u.test(line)) break;
if (!inModel) continue;
const match = line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/u);
if (!match) continue;
const value = match[2].replace(/^['"]|['"]$/gu, "");
model[match[1]] = value;
}
return model;
}

function chatContent(raw: string): string {
const parsed = JSON.parse(raw) as { choices?: Array<{ message?: Record<string, unknown> }> };
const message = parsed.choices?.[0]?.message ?? {};
for (const key of ["content", "reasoning_content", "reasoning"]) {
const value = message[key];
if (typeof value === "string" && value.trim()) return value.trim();
}
return "";
}

test.skipIf(!shouldRunLiveE2EScenarios())(
"Hermes inference set updates route/config and preserves live runtime",
{ timeout: TIMEOUT_MS },
async ({ artifacts, cleanup, host, sandbox, secrets }) => {
const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
await artifacts.writeJson("scenario.json", {
id: "hermes-inference-switch",
legacySource: "test/e2e/test-hermes-inference-switch.sh",
boundary: "install.sh + Hermes sandbox + inference set + in-sandbox health/chat probes",
sandboxName: SANDBOX_NAME,
switchProvider: SWITCH_PROVIDER,
switchModel: SWITCH_MODEL,
switchApi: SWITCH_API,
});

cleanup.add("destroy Hermes inference switch sandbox", async () => {
await bestEffort(() =>
host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
artifactName: "cleanup-nemoclaw-destroy",
env: env(),
timeoutMs: 120_000,
}),
);
await bestEffort(() =>
sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
artifactName: "cleanup-openshell-delete",
env: env(),
timeoutMs: 60_000,
}),
);
});

await bestEffort(() =>
host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
artifactName: "pre-cleanup-destroy",
env: env(),
timeoutMs: 120_000,
}),
);
await bestEffort(() =>
sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
artifactName: "pre-cleanup-delete",
env: env(),
timeoutMs: 60_000,
}),
);

const docker = await host.command("docker", ["info"], {
artifactName: "docker-info",
env: buildAvailabilityProbeEnv(),
timeoutMs: 30_000,
});
expect(docker.exitCode, resultText(docker)).toBe(0);

let install: ShellProbeResult | undefined;
for (let attempt = 1; attempt <= INSTALL_ATTEMPTS; attempt += 1) {
install = await host.command(
"bash",
["install.sh", "--non-interactive", "--yes-i-accept-third-party-software"],
{
artifactName: attempt === 1 ? "install-hermes" : `install-hermes-attempt-${attempt}`,
cwd: REPO_ROOT,
env: env(apiKey),
redactionValues: [apiKey],
timeoutMs: 25 * 60_000,
},
);
if (install.exitCode === 0) break;
if (isTransientProviderValidationFailure(install) && attempt < INSTALL_ATTEMPTS) {
await new Promise((resolve) => setTimeout(resolve, 10_000 * attempt));
continue;
}
break;
}
expect(install, "install command must run").toBeDefined();
expect(install?.exitCode, resultText(install as ShellProbeResult)).toBe(0);

const pidBefore = await sandbox.execShell(
SANDBOX_NAME,
trustedSandboxShellScript(
"ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
),
{ artifactName: "pid-before", env: env(), timeoutMs: 30_000 },
);
const envHashBefore = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
artifactName: "env-hash-before",
env: env(),
timeoutMs: 30_000,
});

const switched = await host.command(
"node",
[CLI, "inference", "set", "--provider", SWITCH_PROVIDER, "--model", SWITCH_MODEL],
{
artifactName: "hermes-inference-set",
env: env(apiKey),
redactionValues: [apiKey],
timeoutMs: 180_000,
},
);
expect(switched.exitCode, resultText(switched)).toBe(0);

const pidAfter = await sandbox.execShell(
SANDBOX_NAME,
trustedSandboxShellScript(
"ps -eo pid=,comm=,args= | awk '$0 ~ /hermes/ && $0 ~ /gateway run/ { print $1; exit }'",
),
{ artifactName: "pid-after", env: env(), timeoutMs: 30_000 },
);
if (pidBefore.stdout.trim() && pidAfter.stdout.trim())
expect(pidAfter.stdout.trim()).toBe(pidBefore.stdout.trim());

const health = await sandbox.exec(
SANDBOX_NAME,
["curl", "-sf", "--max-time", "10", "http://localhost:8642/health"],
{ artifactName: "hermes-health-after-switch", env: env(), timeoutMs: 30_000 },
);
expect(health.exitCode, resultText(health)).toBe(0);
expect(resultText(health)).toMatch(/ok/i);

const route = await sandbox.openshell(["inference", "get", "-g", "nemoclaw"], {
artifactName: "openshell-inference-route",
env: env(),
timeoutMs: 30_000,
});
expect(route.exitCode, resultText(route)).toBe(0);
expect(resultText(route)).toContain(SWITCH_PROVIDER);
expect(resultText(route)).toContain(SWITCH_MODEL);

const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], {
artifactName: "hermes-config-yaml",
env: env(),
redactionValues: [apiKey],
timeoutMs: 30_000,
});
expect(config.exitCode, resultText(config)).toBe(0);
const model = parseHermesModelBlock(config.stdout);
expect(model.default).toBe(SWITCH_MODEL);
expect(model.provider).toBe("custom");
expect(model.base_url).toBe(
SWITCH_API === "anthropic-messages"
? "https://inference.local"
: "https://inference.local/v1",
);
expect(config.stdout).not.toMatch(/^models:\s*$/mu);

const envHashAfter = await sandbox.exec(SANDBOX_NAME, ["sha256sum", "/sandbox/.hermes/.env"], {
artifactName: "env-hash-after",
env: env(),
timeoutMs: 30_000,
});
if (envHashBefore.stdout.trim())
expect(envHashAfter.stdout.split(/\s+/u)[0]).toBe(envHashBefore.stdout.split(/\s+/u)[0]);

const registry = JSON.parse(
fs.readFileSync(path.join(os.homedir(), ".nemoclaw", "sandboxes.json"), "utf8"),
);
expect(registry.sandboxes?.[SANDBOX_NAME]?.agent).toBe("hermes");
expect(registry.sandboxes?.[SANDBOX_NAME]?.provider).toBe(SWITCH_PROVIDER);
expect(registry.sandboxes?.[SANDBOX_NAME]?.model).toBe(SWITCH_MODEL);

const payload = JSON.stringify({
model: SWITCH_MODEL,
messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
max_tokens: 100,
});
const chat = await sandbox.execShell(
SANDBOX_NAME,
trustedSandboxShellScript(
`set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY:-}" -d '${payload.replace(/'/gu, `'\\''`)}'`,
),
{
artifactName: "hermes-api-chat-after-switch",
env: env(),
redactionValues: [apiKey],
timeoutMs: 150_000,
},
);
expect(chat.exitCode, resultText(chat)).toBe(0);
expect(chatContent(chat.stdout)).toMatch(/PONG/i);
},
);