diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index 9522b7471d..0185327e01 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -742,6 +742,64 @@ jobs: if-no-files-found: ignore retention-days: 14 + gpu-e2e-vitest: + needs: generate-matrix + if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',gpu-e2e-vitest,') || contains(format(',{0},', inputs.scenarios), ',gpu-e2e,') }} + runs-on: linux-amd64-gpu-rtxpro6000-latest-1 + timeout-minutes: 90 + env: + FREE_STANDING_VITEST_JOB: "1" + FREE_STANDING_SCENARIO_ID: "gpu-e2e" + E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/gpu-e2e + NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js + NEMOCLAW_RUN_E2E_SCENARIOS: "1" + NEMOCLAW_NON_INTERACTIVE: "1" + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" + NEMOCLAW_PROVIDER: "ollama" + NEMOCLAW_SANDBOX_NAME: "e2e-gpu-ollama" + OPENSHELL_GATEWAY: "nemoclaw" + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Set up Node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0 + with: + node-version: 22 + cache: npm + - name: Install root dependencies + run: npm ci --ignore-scripts + - name: Build CLI + run: npm run build:cli + - name: Install OpenShell CLI + run: bash scripts/install-openshell.sh + - name: Run GPU Ollama live Vitest test + run: | + set -euo pipefail + export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH" + if command -v openshell >/dev/null 2>&1; then + OPENSHELL_BIN="$(command -v openshell)" + elif [ -x "$HOME/.local/bin/openshell" ]; then + OPENSHELL_BIN="$HOME/.local/bin/openshell" + else + echo "::error::OpenShell CLI not found after install" + ls -la /usr/local/bin/openshell "$HOME/.local/bin/openshell" 2>&1 || true + exit 1 + fi + export OPENSHELL_BIN + echo "Using OPENSHELL_BIN=$OPENSHELL_BIN" + "$OPENSHELL_BIN" --version + npx vitest run --project e2e-scenarios-live test/e2e-scenario/live/gpu-e2e.test.ts --silent=false --reporter=default + - name: Upload GPU E2E artifacts + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: e2e-vitest-scenarios-gpu-e2e + path: e2e-artifacts/vitest/gpu-e2e/ + include-hidden-files: false + if-no-files-found: ignore + retention-days: 14 + hermes-inference-switch-vitest: needs: generate-matrix if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',hermes-inference-switch-vitest,') || contains(format(',{0},', inputs.scenarios), ',hermes-inference-switch,') }} @@ -4015,6 +4073,7 @@ jobs: openclaw-skill-cli-vitest, inference-routing-vitest, cloud-inference-vitest, + gpu-e2e-vitest, hermes-inference-switch-vitest, brave-search-vitest, diff --git a/test/e2e-scenario/live/gpu-e2e-helpers.ts b/test/e2e-scenario/live/gpu-e2e-helpers.ts new file mode 100644 index 0000000000..3661ef1aa1 --- /dev/null +++ b/test/e2e-scenario/live/gpu-e2e-helpers.ts @@ -0,0 +1,248 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import path from "node:path"; + +import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; +import type { HostCliClient } from "../fixtures/clients/host.ts"; +import { resultText } from "../fixtures/clients/index.ts"; +import { type SandboxClient, validateSandboxName } from "../fixtures/clients/sandbox.ts"; +import { expect } from "../fixtures/e2e-test.ts"; +import type { ShellProbeResult } from "../fixtures/shell-probe.ts"; + +export const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); +export const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js"); +export const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-gpu-ollama"; +validateSandboxName(SANDBOX_NAME); +export const PROXY_PORT = tcpPort(process.env.NEMOCLAW_OLLAMA_PROXY_PORT, "11435"); + +function tcpPort(value: string | undefined, fallback: string): string { + const raw = value ?? fallback; + if (!/^[1-9][0-9]*$/u.test(raw)) throw new Error(`invalid TCP port: ${raw}`); + const port = Number.parseInt(raw, 10); + if (port < 1 || port > 65_535) throw new Error(`invalid TCP port: ${raw}`); + return raw; +} + +export function env(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { + return { + ...buildAvailabilityProbeEnv(), + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", + NEMOCLAW_NON_INTERACTIVE: "1", + NEMOCLAW_PROVIDER: "ollama", + NEMOCLAW_OLLAMA_PROXY_PORT: PROXY_PORT, + NEMOCLAW_RECREATE_SANDBOX: "1", + NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME, + OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw", + ...extra, + }; +} + +function isShellProbeResult(value: unknown): value is ShellProbeResult { + return ( + typeof value === "object" && + value !== null && + "exitCode" in value && + (typeof (value as { exitCode?: unknown }).exitCode === "number" || + (value as { exitCode?: unknown }).exitCode === null) + ); +} + +export async function bestEffort(label: string, run: () => Promise): Promise { + try { + const result = await run(); + if (isShellProbeResult(result) && result.exitCode !== 0) { + console.warn( + `[gpu-e2e cleanup] ${label} exited ${String(result.exitCode)}: ${resultText(result)}`, + ); + } + } catch (error) { + console.warn( + `[gpu-e2e cleanup] ${label} failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } +} + +export function ollamaProxyTokenFile(): string { + const home = process.env.HOME; + if (!home) throw new Error("HOME environment variable is required"); + return path.join(home, ".nemoclaw", "ollama-proxy-token"); +} + +export function readTokenFileChecked(tokenFile: string): { mode: string; token: string } { + const fd = fs.openSync(tokenFile, "r"); + try { + const stat = fs.fstatSync(fd); + return { mode: (stat.mode & 0o777).toString(8), token: fs.readFileSync(fd, "utf8").trim() }; + } finally { + fs.closeSync(fd); + } +} + +export function chatContent(raw: string): string { + const parsed = JSON.parse(raw) as { + choices?: Array<{ message?: Record; text?: unknown }>; + }; + const choice = parsed.choices?.[0]; + const message = choice?.message ?? {}; + return ( + [message.content, message.reasoning_content, message.reasoning, choice?.text] + .find((value): value is string => typeof value === "string" && value.trim().length > 0) + ?.trim() ?? "" + ); +} + +export async function cleanupGpu(host: HostCliClient, sandbox: SandboxClient): Promise { + await bestEffort("destroy GPU sandbox", () => + host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], { + artifactName: "cleanup-destroy-gpu", + env: env(), + timeoutMs: 120_000, + }), + ); + await bestEffort("delete OpenShell sandbox", () => + sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], { + artifactName: "cleanup-delete-gpu", + env: env(), + timeoutMs: 60_000, + }), + ); + await bestEffort("destroy OpenShell gateway", () => + sandbox.openshell(["gateway", "destroy", "-g", "nemoclaw"], { + artifactName: "cleanup-gateway-destroy-gpu", + env: env(), + timeoutMs: 60_000, + }), + ); + await cleanupOllama(host, "cleanup-ollama-processes"); +} + +export async function cleanupOllama( + host: HostCliClient, + artifactName: string, +): Promise { + return await host.command( + "bash", + [ + "-lc", + "systemctl --user stop ollama 2>/dev/null || true; systemctl stop ollama 2>/dev/null || true; pkill -f '[o]llama serve' 2>/dev/null || true; pkill -f '[o]llama-auth-proxy' 2>/dev/null || true", + ], + { artifactName, env: env(), timeoutMs: 30_000 }, + ); +} + +export function assertNvidiaAvailable( + result: ShellProbeResult, + skip: (note?: string) => never, +): void { + result.exitCode === 0 || process.env.GITHUB_ACTIONS === "true" + ? undefined + : skip(`GPU runner required: ${resultText(result)}`); + result.exitCode === 0 || + process.env.GITHUB_ACTIONS !== "true" || + (() => { + throw new Error(`GPU runner must provide nvidia-smi: ${resultText(result)}`); + })(); +} + +export async function ensureOllama(host: HostCliClient): Promise { + const ollamaExists = await host.command("bash", ["-lc", "command -v ollama"], { + artifactName: "command-v-ollama", + env: env(), + timeoutMs: 30_000, + }); + const missing = ollamaExists.exitCode !== 0; + missing && + expect( + ( + await host.command( + "bash", + [ + "-lc", + // Mirrors the legacy live GPU user path by exercising Ollama's official installer before secrets are passed. + "curl -fsSL https://ollama.com/install.sh | sh", + ], + { artifactName: "install-ollama", env: env(), timeoutMs: 10 * 60_000 }, + ) + ).exitCode, + ).toBe(0); +} + +export function assertGpuInstallProofs(log: string): void { + expect(log).toContain("GPU proof passed: nvidia-smi when available"); + expect(log).toContain("GPU proof passed: /proc//task//comm write"); + expect(log).toContain("GPU proof passed: cuInit(0) via libcuda.so.1"); + log.includes("Docker GPU mode selected") && + expect(log).toContain("GPU sandbox runtime reached local inference"); +} + +export async function proxyStatus( + host: HostCliClient, + token?: string, + artifactName = "proxy-status", +): Promise { + const args = ["-s", "-o", "/dev/null", "-w", "%{http_code}"]; + token && args.push("-H", `Authorization: Bearer ${token}`); + args.push(`http://127.0.0.1:${PROXY_PORT}/api/tags`); + return await host.command("curl", args, { + artifactName, + env: env(), + redactionValues: token ? [token] : undefined, + timeoutMs: 30_000, + }); +} + +export async function restartProxy(host: HostCliClient, token: string): Promise { + return await host.command( + "bash", + [ + "-lc", + `set -euo pipefail +token="\${NEMOCLAW_GPU_E2E_PROXY_TOKEN:?missing proxy token}" +proxy_pid="$(lsof -tiTCP:"$1" -sTCP:LISTEN 2>/dev/null | head -n1 || true)" +if [ -n "$proxy_pid" ]; then + if ! ps -p "$proxy_pid" -o args= | grep -q '[o]llama-auth-proxy'; then + echo "port $1 is not owned by ollama-auth-proxy (pid $proxy_pid)" >&2 + exit 1 + fi + kill "$proxy_pid" 2>/dev/null || true +else + pkill -f '[o]llama-auth-proxy' 2>/dev/null || true +fi +sleep 2 +if curl -s -o /dev/null -w '%{http_code}' --connect-timeout 2 "http://127.0.0.1:$1/api/tags" 2>/dev/null | grep -Eq '^[1-9][0-9]{2}$'; then + echo 'proxy still alive after kill' >&2 + exit 1 +fi +OLLAMA_PROXY_TOKEN="$token" OLLAMA_PROXY_PORT="$1" OLLAMA_BACKEND_PORT=11434 node "$2" >/tmp/nemoclaw-gpu-e2e-restarted-proxy.log 2>&1 & +sleep 2 +curl -s -o /dev/null -w '%{http_code}' -H "Authorization: Bearer $token" "http://127.0.0.1:$1/api/tags"`, + "restart-proxy", + PROXY_PORT, + path.join(REPO_ROOT, "scripts", "ollama-auth-proxy.js"), + ], + { + artifactName: "proxy-restart-from-token", + env: env({ NEMOCLAW_GPU_E2E_PROXY_TOKEN: token }), + redactionValues: [token], + timeoutMs: 60_000, + }, + ); +} + +export async function detectOllamaModel(host: HostCliClient): Promise { + return ( + process.env.NEMOCLAW_MODEL || + ( + await host.command( + "bash", + [ + "-lc", + 'curl -sf http://127.0.0.1:11434/api/tags | python3 -c \'import json,sys; m=json.load(sys.stdin).get("models",[]); print(m[0]["name"] if m else "")\'', + ], + { artifactName: "detect-ollama-model", env: env(), timeoutMs: 30_000 }, + ) + ).stdout.trim() + ); +} diff --git a/test/e2e-scenario/live/gpu-e2e.test.ts b/test/e2e-scenario/live/gpu-e2e.test.ts new file mode 100644 index 0000000000..527fe09123 --- /dev/null +++ b/test/e2e-scenario/live/gpu-e2e.test.ts @@ -0,0 +1,165 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** Live Vitest replacement for test/e2e/test-gpu-e2e.sh. */ + +import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; +import { resultText } from "../fixtures/clients/index.ts"; +import { trustedSandboxShellScript } from "../fixtures/clients/sandbox.ts"; +import { expect, test } from "../fixtures/e2e-test.ts"; +import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; +import { + assertGpuInstallProofs, + assertNvidiaAvailable, + CLI, + chatContent, + cleanupGpu, + cleanupOllama, + detectOllamaModel, + ensureOllama, + env, + ollamaProxyTokenFile, + PROXY_PORT, + proxyStatus, + REPO_ROOT, + readTokenFileChecked, + restartProxy, + SANDBOX_NAME, +} from "./gpu-e2e-helpers.ts"; + +const TIMEOUT_MS = 75 * 60_000; + +test.skipIf(!shouldRunLiveE2EScenarios())( + "GPU Ollama onboard enables CUDA, auth proxy, and sandbox inference", + { timeout: TIMEOUT_MS }, + async ({ artifacts, cleanup, host, sandbox, skip }) => { + await artifacts.writeJson("scenario.json", { + id: "gpu-e2e", + legacySource: "test/e2e/test-gpu-e2e.sh", + boundary: + "GPU host + install.sh Ollama provider + OpenShell sandbox + auth proxy + inference.local", + remoteInstallerBoundary: + "The official Ollama installer compatibility path runs before proxy tokens are read; the workflow uses a read-only checkout token and no explicit repository secrets. Replace with a pinned package once the GPU image provides a stable install source.", + sandboxName: SANDBOX_NAME, + delegatedLegacyContracts: [ + "Phase 11 shell retirement decides whether uninstall --delete-models remains a separate cleanup lane", + "The #5468 OpenClaw TUI compaction guard remains in the retained legacy shell until a TUI fixture exists", + ], + }); + + cleanup.add("destroy GPU Ollama sandbox", () => cleanupGpu(host, sandbox)); + await cleanupGpu(host, sandbox); + + const docker = await host.command("docker", ["info"], { + artifactName: "docker-info", + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }); + expect(docker.exitCode, resultText(docker)).toBe(0); + const nvidia = await host.command("nvidia-smi", [], { + artifactName: "nvidia-smi", + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }); + assertNvidiaAvailable(nvidia, skip); + + await ensureOllama(host); + await cleanupOllama(host, "pre-cleanup-ollama"); + + const install = await host.command("bash", ["install.sh", "--non-interactive"], { + artifactName: "install-gpu-ollama", + cwd: REPO_ROOT, + env: env(), + timeoutMs: 45 * 60_000, + }); + expect(install.exitCode, resultText(install)).toBe(0); + await artifacts.writeText("install-gpu-ollama.log", resultText(install)); + + const status = await host.command("node", [CLI, SANDBOX_NAME, "status"], { + artifactName: "status-gpu-ollama", + env: env(), + timeoutMs: 120_000, + }); + expect(status.exitCode, resultText(status)).toBe(0); + expect(resultText(status)).toContain("Sandbox GPU: enabled"); + expect(resultText(status)).toMatch(/CUDA verified|CUDA unverified|last CUDA proof failed/i); + expect(resultText(status)).not.toMatch(/last CUDA proof failed|CUDA unverified/i); + + assertGpuInstallProofs(resultText(install)); + const route = await sandbox.openshell(["inference", "get"], { + artifactName: "openshell-inference-route", + env: env(), + timeoutMs: 30_000, + }); + expect(route.exitCode, resultText(route)).toBe(0); + expect(resultText(route)).toMatch(/ollama/i); + + const tokenRecord = readTokenFileChecked(ollamaProxyTokenFile()); + expect(tokenRecord.mode).toBe("600"); + const token = tokenRecord.token; + expect(token).not.toBe(""); + + const proxyUnauth = await host.command( + "curl", + [ + "-s", + "-o", + "/dev/null", + "-w", + "%{http_code}", + "-X", + "POST", + `http://127.0.0.1:${PROXY_PORT}/api/generate`, + "-d", + "{}", + ], + { artifactName: "proxy-unauth-generate-status", env: env(), timeoutMs: 30_000 }, + ); + expect(proxyUnauth.stdout.trim()).toBe("401"); + expect( + (await proxyStatus(host, "wrong-token", "proxy-wrong-token-tags-status")).stdout.trim(), + ).toBe("401"); + expect((await proxyStatus(host, token, "proxy-correct-token-tags-status")).stdout.trim()).toBe( + "200", + ); + const restarted = await restartProxy(host, token); + expect(restarted.exitCode, resultText(restarted)).toBe(0); + expect(restarted.stdout.trim()).toBe("200"); + + const model = await detectOllamaModel(host); + expect(model).not.toBe(""); + const payload = JSON.stringify({ + model, + messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], + max_tokens: 200, + }); + const direct = await host.command( + "curl", + [ + "-s", + "--max-time", + "120", + "-X", + "POST", + "http://127.0.0.1:11434/v1/chat/completions", + "-H", + "Content-Type: application/json", + "-d", + payload, + ], + { artifactName: "direct-ollama-chat", env: env(), timeoutMs: 150_000 }, + ); + expect(direct.exitCode, resultText(direct)).toBe(0); + expect(chatContent(direct.stdout)).toMatch(/PONG/i); + + const sandboxChat = await sandbox.execShell( + SANDBOX_NAME, + trustedSandboxShellScript( + `curl -skS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${payload.replace(/'/gu, `'\\''`)}'`, + ), + { artifactName: "sandbox-inference-local-chat", env: env(), timeoutMs: 150_000 }, + ); + expect(sandboxChat.exitCode, resultText(sandboxChat)).toBe(0); + expect(chatContent(sandboxChat.stdout)).toMatch(/PONG/i); + }, +);