diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index 9522b7471d..0185327e01 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -742,6 +742,64 @@ jobs:
           if-no-files-found: ignore
           retention-days: 14
 
+  gpu-e2e-vitest:
+    needs: generate-matrix
+    if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',gpu-e2e-vitest,') || contains(format(',{0},', inputs.scenarios), ',gpu-e2e,') }}
+    runs-on: linux-amd64-gpu-rtxpro6000-latest-1
+    timeout-minutes: 90
+    env:
+      FREE_STANDING_VITEST_JOB: "1"
+      FREE_STANDING_SCENARIO_ID: "gpu-e2e"
+      E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/gpu-e2e
+      NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js
+      NEMOCLAW_RUN_E2E_SCENARIOS: "1"
+      NEMOCLAW_NON_INTERACTIVE: "1"
+      NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
+      NEMOCLAW_PROVIDER: "ollama"
+      NEMOCLAW_SANDBOX_NAME: "e2e-gpu-ollama"
+      OPENSHELL_GATEWAY: "nemoclaw"
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false
+      - name: Set up Node
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
+        with:
+          node-version: 22
+          cache: npm
+      - name: Install root dependencies
+        run: npm ci --ignore-scripts
+      - name: Build CLI
+        run: npm run build:cli
+      - name: Install OpenShell CLI
+        run: bash scripts/install-openshell.sh
+      - name: Run GPU Ollama live Vitest test
+        run: |
+          set -euo pipefail
+          export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
+          if command -v openshell >/dev/null 2>&1; then
+            OPENSHELL_BIN="$(command -v openshell)"
+          elif [ -x "$HOME/.local/bin/openshell" ]; then
+            OPENSHELL_BIN="$HOME/.local/bin/openshell"
+          else
+            echo "::error::OpenShell CLI not found after install"
+            ls -la /usr/local/bin/openshell "$HOME/.local/bin/openshell" 2>&1 || true
+            exit 1
+          fi
+          export OPENSHELL_BIN
+          echo "Using OPENSHELL_BIN=$OPENSHELL_BIN"
+          "$OPENSHELL_BIN" --version
+          npx vitest run --project e2e-scenarios-live             test/e2e-scenario/live/gpu-e2e.test.ts             --silent=false --reporter=default
+      - name: Upload GPU E2E artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: e2e-vitest-scenarios-gpu-e2e
+          path: e2e-artifacts/vitest/gpu-e2e/
+          include-hidden-files: false
+          if-no-files-found: ignore
+          retention-days: 14
+
   hermes-inference-switch-vitest:
     needs: generate-matrix
     if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',hermes-inference-switch-vitest,') || contains(format(',{0},', inputs.scenarios), ',hermes-inference-switch,') }}
@@ -4015,6 +4073,7 @@ jobs:
         openclaw-skill-cli-vitest,
         inference-routing-vitest,
         cloud-inference-vitest,
+        gpu-e2e-vitest,
         hermes-inference-switch-vitest,
         brave-search-vitest,
 
diff --git a/test/e2e-scenario/live/gpu-e2e-helpers.ts b/test/e2e-scenario/live/gpu-e2e-helpers.ts
new file mode 100644
index 0000000000..3661ef1aa1
--- /dev/null
+++ b/test/e2e-scenario/live/gpu-e2e-helpers.ts
@@ -0,0 +1,248 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import fs from "node:fs";
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import type { HostCliClient } from "../fixtures/clients/host.ts";
+import { resultText } from "../fixtures/clients/index.ts";
+import { type SandboxClient, validateSandboxName } from "../fixtures/clients/sandbox.ts";
+import { expect } from "../fixtures/e2e-test.ts";
+import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+
+export const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+export const CLI = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+export const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-gpu-ollama";
+validateSandboxName(SANDBOX_NAME);
+export const PROXY_PORT = tcpPort(process.env.NEMOCLAW_OLLAMA_PROXY_PORT, "11435");
+
+function tcpPort(value: string | undefined, fallback: string): string {
+  const raw = value ?? fallback;
+  if (!/^[1-9][0-9]*$/u.test(raw)) throw new Error(`invalid TCP port: ${raw}`);
+  const port = Number.parseInt(raw, 10);
+  if (port < 1 || port > 65_535) throw new Error(`invalid TCP port: ${raw}`);
+  return raw;
+}
+
+export function env(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
+  return {
+    ...buildAvailabilityProbeEnv(),
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_PROVIDER: "ollama",
+    NEMOCLAW_OLLAMA_PROXY_PORT: PROXY_PORT,
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+    ...extra,
+  };
+}
+
+function isShellProbeResult(value: unknown): value is ShellProbeResult {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    "exitCode" in value &&
+    (typeof (value as { exitCode?: unknown }).exitCode === "number" ||
+      (value as { exitCode?: unknown }).exitCode === null)
+  );
+}
+
+export async function bestEffort(label: string, run: () => Promise<unknown>): Promise<void> {
+  try {
+    const result = await run();
+    if (isShellProbeResult(result) && result.exitCode !== 0) {
+      console.warn(
+        `[gpu-e2e cleanup] ${label} exited ${String(result.exitCode)}: ${resultText(result)}`,
+      );
+    }
+  } catch (error) {
+    console.warn(
+      `[gpu-e2e cleanup] ${label} failed: ${error instanceof Error ? error.message : String(error)}`,
+    );
+  }
+}
+
+export function ollamaProxyTokenFile(): string {
+  const home = process.env.HOME;
+  if (!home) throw new Error("HOME environment variable is required");
+  return path.join(home, ".nemoclaw", "ollama-proxy-token");
+}
+
+export function readTokenFileChecked(tokenFile: string): { mode: string; token: string } {
+  const fd = fs.openSync(tokenFile, "r");
+  try {
+    const stat = fs.fstatSync(fd);
+    return { mode: (stat.mode & 0o777).toString(8), token: fs.readFileSync(fd, "utf8").trim() };
+  } finally {
+    fs.closeSync(fd);
+  }
+}
+
+export function chatContent(raw: string): string {
+  const parsed = JSON.parse(raw) as {
+    choices?: Array<{ message?: Record<string, unknown>; text?: unknown }>;
+  };
+  const choice = parsed.choices?.[0];
+  const message = choice?.message ?? {};
+  return (
+    [message.content, message.reasoning_content, message.reasoning, choice?.text]
+      .find((value): value is string => typeof value === "string" && value.trim().length > 0)
+      ?.trim() ?? ""
+  );
+}
+
+export async function cleanupGpu(host: HostCliClient, sandbox: SandboxClient): Promise<void> {
+  await bestEffort("destroy GPU sandbox", () =>
+    host.command("node", [CLI, SANDBOX_NAME, "destroy", "--yes"], {
+      artifactName: "cleanup-destroy-gpu",
+      env: env(),
+      timeoutMs: 120_000,
+    }),
+  );
+  await bestEffort("delete OpenShell sandbox", () =>
+    sandbox.openshell(["sandbox", "delete", SANDBOX_NAME], {
+      artifactName: "cleanup-delete-gpu",
+      env: env(),
+      timeoutMs: 60_000,
+    }),
+  );
+  await bestEffort("destroy OpenShell gateway", () =>
+    sandbox.openshell(["gateway", "destroy", "-g", "nemoclaw"], {
+      artifactName: "cleanup-gateway-destroy-gpu",
+      env: env(),
+      timeoutMs: 60_000,
+    }),
+  );
+  await cleanupOllama(host, "cleanup-ollama-processes");
+}
+
+export async function cleanupOllama(
+  host: HostCliClient,
+  artifactName: string,
+): Promise<ShellProbeResult> {
+  return await host.command(
+    "bash",
+    [
+      "-lc",
+      "systemctl --user stop ollama 2>/dev/null || true; systemctl stop ollama 2>/dev/null || true; pkill -f '[o]llama serve' 2>/dev/null || true; pkill -f '[o]llama-auth-proxy' 2>/dev/null || true",
+    ],
+    { artifactName, env: env(), timeoutMs: 30_000 },
+  );
+}
+
+export function assertNvidiaAvailable(
+  result: ShellProbeResult,
+  skip: (note?: string) => never,
+): void {
+  result.exitCode === 0 || process.env.GITHUB_ACTIONS === "true"
+    ? undefined
+    : skip(`GPU runner required: ${resultText(result)}`);
+  result.exitCode === 0 ||
+    process.env.GITHUB_ACTIONS !== "true" ||
+    (() => {
+      throw new Error(`GPU runner must provide nvidia-smi: ${resultText(result)}`);
+    })();
+}
+
+export async function ensureOllama(host: HostCliClient): Promise<void> {
+  const ollamaExists = await host.command("bash", ["-lc", "command -v ollama"], {
+    artifactName: "command-v-ollama",
+    env: env(),
+    timeoutMs: 30_000,
+  });
+  const missing = ollamaExists.exitCode !== 0;
+  missing &&
+    expect(
+      (
+        await host.command(
+          "bash",
+          [
+            "-lc",
+            // Mirrors the legacy live GPU user path by exercising Ollama's official installer before secrets are passed.
+            "curl -fsSL https://ollama.com/install.sh | sh",
+          ],
+          { artifactName: "install-ollama", env: env(), timeoutMs: 10 * 60_000 },
+        )
+      ).exitCode,
+    ).toBe(0);
+}
+
+export function assertGpuInstallProofs(log: string): void {
+  expect(log).toContain("GPU proof passed: nvidia-smi when available");
+  expect(log).toContain("GPU proof passed: /proc/<pid>/task/<tid>/comm write");
+  expect(log).toContain("GPU proof passed: cuInit(0) via libcuda.so.1");
+  log.includes("Docker GPU mode selected") &&
+    expect(log).toContain("GPU sandbox runtime reached local inference");
+}
+
+export async function proxyStatus(
+  host: HostCliClient,
+  token?: string,
+  artifactName = "proxy-status",
+): Promise<ShellProbeResult> {
+  const args = ["-s", "-o", "/dev/null", "-w", "%{http_code}"];
+  token && args.push("-H", `Authorization: Bearer ${token}`);
+  args.push(`http://127.0.0.1:${PROXY_PORT}/api/tags`);
+  return await host.command("curl", args, {
+    artifactName,
+    env: env(),
+    redactionValues: token ? [token] : undefined,
+    timeoutMs: 30_000,
+  });
+}
+
+export async function restartProxy(host: HostCliClient, token: string): Promise<ShellProbeResult> {
+  return await host.command(
+    "bash",
+    [
+      "-lc",
+      `set -euo pipefail
+token="\${NEMOCLAW_GPU_E2E_PROXY_TOKEN:?missing proxy token}"
+proxy_pid="$(lsof -tiTCP:"$1" -sTCP:LISTEN 2>/dev/null | head -n1 || true)"
+if [ -n "$proxy_pid" ]; then
+  if ! ps -p "$proxy_pid" -o args= | grep -q '[o]llama-auth-proxy'; then
+    echo "port $1 is not owned by ollama-auth-proxy (pid $proxy_pid)" >&2
+    exit 1
+  fi
+  kill "$proxy_pid" 2>/dev/null || true
+else
+  pkill -f '[o]llama-auth-proxy' 2>/dev/null || true
+fi
+sleep 2
+if curl -s -o /dev/null -w '%{http_code}' --connect-timeout 2 "http://127.0.0.1:$1/api/tags" 2>/dev/null | grep -Eq '^[1-9][0-9]{2}$'; then
+  echo 'proxy still alive after kill' >&2
+  exit 1
+fi
+OLLAMA_PROXY_TOKEN="$token" OLLAMA_PROXY_PORT="$1" OLLAMA_BACKEND_PORT=11434 node "$2" >/tmp/nemoclaw-gpu-e2e-restarted-proxy.log 2>&1 &
+sleep 2
+curl -s -o /dev/null -w '%{http_code}' -H "Authorization: Bearer $token" "http://127.0.0.1:$1/api/tags"`,
+      "restart-proxy",
+      PROXY_PORT,
+      path.join(REPO_ROOT, "scripts", "ollama-auth-proxy.js"),
+    ],
+    {
+      artifactName: "proxy-restart-from-token",
+      env: env({ NEMOCLAW_GPU_E2E_PROXY_TOKEN: token }),
+      redactionValues: [token],
+      timeoutMs: 60_000,
+    },
+  );
+}
+
+export async function detectOllamaModel(host: HostCliClient): Promise<string> {
+  return (
+    process.env.NEMOCLAW_MODEL ||
+    (
+      await host.command(
+        "bash",
+        [
+          "-lc",
+          'curl -sf http://127.0.0.1:11434/api/tags | python3 -c \'import json,sys; m=json.load(sys.stdin).get("models",[]); print(m[0]["name"] if m else "")\'',
+        ],
+        { artifactName: "detect-ollama-model", env: env(), timeoutMs: 30_000 },
+      )
+    ).stdout.trim()
+  );
+}
diff --git a/test/e2e-scenario/live/gpu-e2e.test.ts b/test/e2e-scenario/live/gpu-e2e.test.ts
new file mode 100644
index 0000000000..527fe09123
--- /dev/null
+++ b/test/e2e-scenario/live/gpu-e2e.test.ts
@@ -0,0 +1,165 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/** Live Vitest replacement for test/e2e/test-gpu-e2e.sh. */
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import { resultText } from "../fixtures/clients/index.ts";
+import { trustedSandboxShellScript } from "../fixtures/clients/sandbox.ts";
+import { expect, test } from "../fixtures/e2e-test.ts";
+import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
+import {
+  assertGpuInstallProofs,
+  assertNvidiaAvailable,
+  CLI,
+  chatContent,
+  cleanupGpu,
+  cleanupOllama,
+  detectOllamaModel,
+  ensureOllama,
+  env,
+  ollamaProxyTokenFile,
+  PROXY_PORT,
+  proxyStatus,
+  REPO_ROOT,
+  readTokenFileChecked,
+  restartProxy,
+  SANDBOX_NAME,
+} from "./gpu-e2e-helpers.ts";
+
+const TIMEOUT_MS = 75 * 60_000;
+
+test.skipIf(!shouldRunLiveE2EScenarios())(
+  "GPU Ollama onboard enables CUDA, auth proxy, and sandbox inference",
+  { timeout: TIMEOUT_MS },
+  async ({ artifacts, cleanup, host, sandbox, skip }) => {
+    await artifacts.writeJson("scenario.json", {
+      id: "gpu-e2e",
+      legacySource: "test/e2e/test-gpu-e2e.sh",
+      boundary:
+        "GPU host + install.sh Ollama provider + OpenShell sandbox + auth proxy + inference.local",
+      remoteInstallerBoundary:
+        "The official Ollama installer compatibility path runs before proxy tokens are read; the workflow uses a read-only checkout token and no explicit repository secrets. Replace with a pinned package once the GPU image provides a stable install source.",
+      sandboxName: SANDBOX_NAME,
+      delegatedLegacyContracts: [
+        "Phase 11 shell retirement decides whether uninstall --delete-models remains a separate cleanup lane",
+        "The #5468 OpenClaw TUI compaction guard remains in the retained legacy shell until a TUI fixture exists",
+      ],
+    });
+
+    cleanup.add("destroy GPU Ollama sandbox", () => cleanupGpu(host, sandbox));
+    await cleanupGpu(host, sandbox);
+
+    const docker = await host.command("docker", ["info"], {
+      artifactName: "docker-info",
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 30_000,
+    });
+    expect(docker.exitCode, resultText(docker)).toBe(0);
+    const nvidia = await host.command("nvidia-smi", [], {
+      artifactName: "nvidia-smi",
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 30_000,
+    });
+    assertNvidiaAvailable(nvidia, skip);
+
+    await ensureOllama(host);
+    await cleanupOllama(host, "pre-cleanup-ollama");
+
+    const install = await host.command("bash", ["install.sh", "--non-interactive"], {
+      artifactName: "install-gpu-ollama",
+      cwd: REPO_ROOT,
+      env: env(),
+      timeoutMs: 45 * 60_000,
+    });
+    expect(install.exitCode, resultText(install)).toBe(0);
+    await artifacts.writeText("install-gpu-ollama.log", resultText(install));
+
+    const status = await host.command("node", [CLI, SANDBOX_NAME, "status"], {
+      artifactName: "status-gpu-ollama",
+      env: env(),
+      timeoutMs: 120_000,
+    });
+    expect(status.exitCode, resultText(status)).toBe(0);
+    expect(resultText(status)).toContain("Sandbox GPU: enabled");
+    expect(resultText(status)).toMatch(/CUDA verified|CUDA unverified|last CUDA proof failed/i);
+    expect(resultText(status)).not.toMatch(/last CUDA proof failed|CUDA unverified/i);
+
+    assertGpuInstallProofs(resultText(install));
+    const route = await sandbox.openshell(["inference", "get"], {
+      artifactName: "openshell-inference-route",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    expect(route.exitCode, resultText(route)).toBe(0);
+    expect(resultText(route)).toMatch(/ollama/i);
+
+    const tokenRecord = readTokenFileChecked(ollamaProxyTokenFile());
+    expect(tokenRecord.mode).toBe("600");
+    const token = tokenRecord.token;
+    expect(token).not.toBe("");
+
+    const proxyUnauth = await host.command(
+      "curl",
+      [
+        "-s",
+        "-o",
+        "/dev/null",
+        "-w",
+        "%{http_code}",
+        "-X",
+        "POST",
+        `http://127.0.0.1:${PROXY_PORT}/api/generate`,
+        "-d",
+        "{}",
+      ],
+      { artifactName: "proxy-unauth-generate-status", env: env(), timeoutMs: 30_000 },
+    );
+    expect(proxyUnauth.stdout.trim()).toBe("401");
+    expect(
+      (await proxyStatus(host, "wrong-token", "proxy-wrong-token-tags-status")).stdout.trim(),
+    ).toBe("401");
+    expect((await proxyStatus(host, token, "proxy-correct-token-tags-status")).stdout.trim()).toBe(
+      "200",
+    );
+    const restarted = await restartProxy(host, token);
+    expect(restarted.exitCode, resultText(restarted)).toBe(0);
+    expect(restarted.stdout.trim()).toBe("200");
+
+    const model = await detectOllamaModel(host);
+    expect(model).not.toBe("");
+    const payload = JSON.stringify({
+      model,
+      messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+      max_tokens: 200,
+    });
+    const direct = await host.command(
+      "curl",
+      [
+        "-s",
+        "--max-time",
+        "120",
+        "-X",
+        "POST",
+        "http://127.0.0.1:11434/v1/chat/completions",
+        "-H",
+        "Content-Type: application/json",
+        "-d",
+        payload,
+      ],
+      { artifactName: "direct-ollama-chat", env: env(), timeoutMs: 150_000 },
+    );
+    expect(direct.exitCode, resultText(direct)).toBe(0);
+    expect(chatContent(direct.stdout)).toMatch(/PONG/i);
+
+    const sandboxChat = await sandbox.execShell(
+      SANDBOX_NAME,
+      trustedSandboxShellScript(
+        `curl -skS --max-time 90 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d '${payload.replace(/'/gu, `'\\''`)}'`,
+      ),
+      { artifactName: "sandbox-inference-local-chat", env: env(), timeoutMs: 150_000 },
+    );
+    expect(sandboxChat.exitCode, resultText(sandboxChat)).toBe(0);
+    expect(chatContent(sandboxChat.stdout)).toMatch(/PONG/i);
+  },
+);