Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions .github/workflows/e2e-vitest-scenarios.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,53 @@ jobs:
if-no-files-found: ignore
retention-days: 14

gpu-e2e-vitest:
needs: generate-matrix
if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',gpu-e2e-vitest,') || contains(format(',{0},', inputs.scenarios), ',gpu-e2e,') }}
runs-on: linux-amd64-gpu-rtxpro6000-latest-1
timeout-minutes: 90
env:
FREE_STANDING_VITEST_JOB: "1"
FREE_STANDING_SCENARIO_ID: "gpu-e2e"
E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/gpu-e2e
NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js
NEMOCLAW_RUN_E2E_SCENARIOS: "1"
NEMOCLAW_NON_INTERACTIVE: "1"
NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
NEMOCLAW_PROVIDER: "ollama"
NEMOCLAW_SANDBOX_NAME: "e2e-gpu-ollama"
OPENSHELL_GATEWAY: "nemoclaw"
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Set up Node
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
with:
node-version: 22
cache: npm
- name: Install root dependencies
run: npm ci --ignore-scripts
- name: Build CLI
run: npm run build:cli
- name: Install OpenShell CLI
run: bash scripts/install-openshell.sh
- name: Run GPU Ollama live Vitest test
run: |
set -euo pipefail
export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
export OPENSHELL_BIN="$(command -v openshell || true)"
npx vitest run --project e2e-scenarios-live test/e2e-scenario/live/gpu-e2e.test.ts --silent=false --reporter=default
- name: Upload GPU E2E artifacts
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: e2e-vitest-scenarios-gpu-e2e
path: e2e-artifacts/vitest/gpu-e2e/
include-hidden-files: false
if-no-files-found: ignore
retention-days: 14

issue-4434-tui-unreachable-inference-vitest:
needs: generate-matrix
if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',issue-4434-tui-unreachable-inference-vitest,') || contains(format(',{0},', inputs.scenarios), ',issue-4434-tui-unreachable-inference,') }}
Expand Down Expand Up @@ -3586,6 +3633,7 @@ jobs:
openclaw-skill-cli-vitest,
inference-routing-vitest,
cloud-inference-vitest,
gpu-e2e-vitest,
credential-sanitization-vitest,
credential-migration-vitest,
sessions-agents-cli-vitest,
Expand Down
337 changes: 337 additions & 0 deletions test/e2e-scenario/live/gpu-e2e.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

/** Live Vitest replacement for test/e2e/test-gpu-e2e.sh. */

import fs from "node:fs";
import path from "node:path";

import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
import { resultText } from "../fixtures/clients/index.ts";
import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
import { expect, test } from "../fixtures/e2e-test.ts";
import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";

const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-gpu-ollama";
validateSandboxName(SANDBOX_NAME);
const PROXY_PORT = process.env.NEMOCLAW_OLLAMA_PROXY_PORT ?? "11435";
const TIMEOUT_MS = 75 * 60_000;

function env(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
return {
...buildAvailabilityProbeEnv(),
NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
NEMOCLAW_NON_INTERACTIVE: "1",
NEMOCLAW_PROVIDER: "ollama",
NEMOCLAW_OLLAMA_PROXY_PORT: PROXY_PORT,
NEMOCLAW_RECREATE_SANDBOX: "1",
NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
...extra,
};
}

async function bestEffort(run: () => Promise<unknown>): Promise<void> {
try {
await run();
} catch {}
}

function readTokenFileChecked(tokenFile: string): { mode: string; token: string } {
const fd = fs.openSync(tokenFile, "r");
try {
const stat = fs.fstatSync(fd);
return { mode: (stat.mode & 0o777).toString(8), token: fs.readFileSync(fd, "utf8").trim() };
} finally {
fs.closeSync(fd);
}
}

function chatContent(raw: string): string {
const parsed = JSON.parse(raw) as {
choices?: Array<{ message?: Record<string, unknown>; text?: unknown }>;
};
const choice = parsed.choices?.[0];
const message = choice?.message ?? {};
for (const value of [
message.content,
message.reasoning_content,
message.reasoning,
choice?.text,
]) {
if (typeof value === "string" && value.trim()) return value.trim();
}
return "";
}

test.skipIf(!shouldRunLiveE2EScenarios())(
"GPU Ollama onboard enables CUDA, auth proxy, and sandbox inference",
{ timeout: TIMEOUT_MS },
async ({ artifacts, cleanup, host, sandbox, skip }) => {
await artifacts.writeJson("scenario.json", {
id: "gpu-e2e",
legacySource: "test/e2e/test-gpu-e2e.sh",
boundary:
"GPU host + install.sh Ollama provider + OpenShell sandbox + auth proxy + inference.local",
sandboxName: SANDBOX_NAME,
});

cleanup.add("destroy GPU Ollama sandbox", async () => {
await bestEffort(() =>
host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
artifactName: "cleanup-destroy-gpu",
env: env(),
timeoutMs: 120_000,
}),
);
await bestEffort(() =>
sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
artifactName: "cleanup-delete-gpu",
env: env(),
timeoutMs: 60_000,
}),
);
await bestEffort(() =>
sandbox.openshell(["gateway", "destroy", "-g", "nemoclaw"], {
artifactName: "cleanup-gateway-destroy-gpu",
env: env(),
timeoutMs: 60_000,
}),
);
await bestEffort(() =>
host.command(
"bash",
[
"-lc",
"pkill -f 'ollama serve' 2>/dev/null || true; pkill -f 'ollama-auth-proxy' 2>/dev/null || true",
],
{ artifactName: "cleanup-ollama-processes", env: env(), timeoutMs: 30_000 },
),
);
});

await bestEffort(() =>
host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
artifactName: "pre-cleanup-destroy-gpu",
env: env(),
timeoutMs: 120_000,
}),
);
await bestEffort(() =>
sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
artifactName: "pre-cleanup-delete-gpu",
env: env(),
timeoutMs: 60_000,
}),
);
await bestEffort(() =>
sandbox.openshell(["gateway", "destroy", "-g", "nemoclaw"], {
artifactName: "pre-cleanup-gateway-destroy-gpu",
env: env(),
timeoutMs: 60_000,
}),
);

const docker = await host.command("docker", ["info"], {
artifactName: "docker-info",
env: buildAvailabilityProbeEnv(),
timeoutMs: 30_000,
});
expect(docker.exitCode, resultText(docker)).toBe(0);
const nvidia = await host.command("nvidia-smi", [], {
artifactName: "nvidia-smi",
env: buildAvailabilityProbeEnv(),
timeoutMs: 30_000,
});
if (nvidia.exitCode !== 0) skip(`GPU runner required: ${resultText(nvidia)}`);

const ollamaExists = await host.command("bash", ["-lc", "command -v ollama"], {
artifactName: "command-v-ollama",
env: env(),
timeoutMs: 30_000,
});
if (ollamaExists.exitCode !== 0) {
const installOllama = await host.command(
"bash",
[
"-lc",
// Mirrors the legacy live GPU user path by exercising Ollama's
// official installer before any repository/GitHub credentials are
// provided to child processes.
"curl -fsSL https://ollama.com/install.sh | sh",
],
{ artifactName: "install-ollama", env: env(), timeoutMs: 10 * 60_000 },
);
expect(installOllama.exitCode, resultText(installOllama)).toBe(0);
}

await host.command(
"bash",
[
"-lc",
"systemctl --user stop ollama 2>/dev/null || true; systemctl stop ollama 2>/dev/null || true; pkill -f 'ollama serve' 2>/dev/null || true; pkill -f 'ollama-auth-proxy' 2>/dev/null || true",
],
{ artifactName: "pre-cleanup-ollama", env: env(), timeoutMs: 30_000 },
);

const install = await host.command("bash", ["install.sh", "--non-interactive"], {
artifactName: "install-gpu-ollama",
cwd: REPO_ROOT,
env: env(),
timeoutMs: 45 * 60_000,
});
expect(install.exitCode, resultText(install)).toBe(0);
await artifacts.writeText("install-gpu-ollama.log", resultText(install));

const status = await host.command("node", [CLI, SANDBOX_NAME, "status"], {
artifactName: "status-gpu-ollama",
env: env(),
timeoutMs: 120_000,
});
expect(status.exitCode, resultText(status)).toBe(0);
expect(resultText(status)).toContain("Sandbox GPU: enabled");
expect(resultText(status)).toMatch(/CUDA verified|CUDA unverified|last CUDA proof failed/i);
expect(resultText(status)).not.toMatch(/last CUDA proof failed|CUDA unverified/i);

const log = resultText(install);
expect(log).toContain("GPU proof passed: nvidia-smi when available");
expect(log).toContain("GPU proof passed: cuInit(0) via libcuda.so.1");

const tokenFile = path.join(process.env.HOME ?? "", ".nemoclaw", "ollama-proxy-token");
const tokenRecord = readTokenFileChecked(tokenFile);
expect(tokenRecord.mode).toBe("600");
const token = tokenRecord.token;
expect(token).not.toBe("");

const proxyUnauth = await host.command(
"curl",
[
"-s",
"-o",
"/dev/null",
"-w",
"%{http_code}",
"-X",
"POST",
`http://127.0.0.1:${PROXY_PORT}/api/generate`,
"-d",
"{}",
],
{ artifactName: "proxy-unauth-generate-status", env: env(), timeoutMs: 30_000 },
);
expect(proxyUnauth.stdout.trim()).toBe("401");
const wrongToken = await host.command(
"curl",
[
"-s",
"-o",
"/dev/null",
"-w",
"%{http_code}",
"-H",
"Authorization: Bearer wrong-token",
`http://127.0.0.1:${PROXY_PORT}/api/tags`,
],
{ artifactName: "proxy-wrong-token-tags-status", env: env(), timeoutMs: 30_000 },
);
expect(wrongToken.stdout.trim()).toBe("401");
const correctToken = await host.command(
"curl",
[
"-s",
"-o",
"/dev/null",
"-w",
"%{http_code}",
"-H",
`Authorization: Bearer ${token}`,
`http://127.0.0.1:${PROXY_PORT}/api/tags`,
],
{
artifactName: "proxy-correct-token-tags-status",
env: env(),
redactionValues: [token],
timeoutMs: 30_000,
},
);
expect(correctToken.stdout.trim()).toBe("200");
const restartProxy = await host.command(
"bash",
[
"-lc",
`set -euo pipefail
pkill -f 'ollama-auth-proxy' 2>/dev/null || true
sleep 2
if curl -s -o /dev/null -w '%{http_code}' --connect-timeout 2 http://127.0.0.1:${PROXY_PORT}/api/tags 2>/dev/null | grep -Eq '^[1-9][0-9]{2}$'; then
echo 'proxy still alive after kill' >&2
exit 1
fi
OLLAMA_PROXY_TOKEN="$1" OLLAMA_PROXY_PORT="$2" OLLAMA_BACKEND_PORT=11434 node "$3" >/tmp/nemoclaw-gpu-e2e-restarted-proxy.log 2>&1 &
sleep 2
curl -s -o /dev/null -w '%{http_code}' -H "Authorization: Bearer $1" "http://127.0.0.1:$2/api/tags"`,
"restart-proxy",
token,
PROXY_PORT,
path.join(REPO_ROOT, "scripts", "ollama-auth-proxy.js"),
],
{
artifactName: "proxy-restart-from-token",
env: env(),
redactionValues: [token],
timeoutMs: 60_000,
},
);
expect(restartProxy.exitCode, resultText(restartProxy)).toBe(0);
expect(restartProxy.stdout.trim()).toBe("200");

const model =
process.env.NEMOCLAW_MODEL ||
(
await host.command(
"bash",
[
"-lc",
'curl -sf http://127.0.0.1:11434/api/tags | python3 -c \'import json,sys; m=json.load(sys.stdin).get("models",[]); print(m[0]["name"] if m else "")\'',
],
{ artifactName: "detect-ollama-model", env: env(), timeoutMs: 30_000 },
)
).stdout.trim();
expect(model).not.toBe("");

const payload = JSON.stringify({
model,
messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
max_tokens: 200,
});
const direct = await host.command(
"curl",
[
"-s",
"--max-time",
"120",
"-X",
"POST",
"http://127.0.0.1:11434/v1/chat/completions",
"-H",
"Content-Type: application/json",
"-d",
payload,
],
{ artifactName: "direct-ollama-chat", env: env(), timeoutMs: 150_000 },
);
expect(direct.exitCode, resultText(direct)).toBe(0);
expect(chatContent(direct.stdout)).toMatch(/PONG/i);

const sandboxChat = await sandbox.execShell(
SANDBOX_NAME,
trustedSandboxShellScript(
`curl -skS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${payload.replace(/'/gu, `'\\''`)}'`,
),
{ artifactName: "sandbox-inference-local-chat", env: env(), timeoutMs: 150_000 },
);
expect(sandboxChat.exitCode, resultText(sandboxChat)).toBe(0);
expect(chatContent(sandboxChat.stdout)).toMatch(/PONG/i);
},
);