Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 80 additions & 3 deletions src/arc_llama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
init_config_from_detection,
load_config,
)
from arc_llama.detect import detect_gpus, lspci_intel_gpus
from arc_llama.detect import detect_gpus, lspci_intel_gpus, render_nodes_in_dev
from arc_llama.models import (
add_local_model,
discover_ggufs,
Expand Down Expand Up @@ -208,12 +208,90 @@ def doctor(ctx: click.Context) -> None:
else:
console.print(" lspci shows no Intel display devices either.")

# DRM device nodes in /dev/dri/
console.print("\n /dev/dri/ render nodes:")
dev_dri = Path("/dev/dri")
if not dev_dri.exists():
console.print(
" [red]/dev/dri/ does not exist[/red] — no DRM device nodes "
"present in this environment."
)
console.print(
" In a container: pass the device through with --device=/dev/dri/")
else:
render_nodes = render_nodes_in_dev()
all_dri = sorted(dev_dri.iterdir()) if dev_dri.exists() else []
if not render_nodes:
console.print(
" [yellow]no renderD* nodes found[/yellow] (all entries: "
+ (", ".join(p.name for p in all_dri) or "none")
+ ")"
)
else:
for node in render_nodes:
try:
stat = node.stat()
mode = oct(stat.st_mode)[-3:]
except OSError:
mode = "???"
console.print(f" [green]{node}[/green] (mode={mode})")

# Cross-reference: driver bound but render node absent
_missing_render = [
g for g in gpus if g.driver in ("xe", "i915") and g.drm_render is None
]
if _missing_render:
console.print()
for g in _missing_render:
console.print(
f" [red]WARNING[/red]: {g.name} has `{g.driver}` driver bound "
f"but no DRM render node was created."
)
console.print(
" This is the most common cause of "
"\"No device of requested type available\" from SYCL.\n"
" Steps to resolve:\n"
" 1. Check render nodes: ls -la /dev/dri/\n"
" 2. Check driver init: dmesg | grep -E '(xe|i915|drm)' | tail -30\n"
" 3. In containers: ensure host /dev/dri is passed through "
"(--device=/dev/dri/ or --device=/dev/dri/renderD128)\n"
" 4. Add to groups: sudo usermod -aG render,video $USER (re-login)\n"
" 5. Reload driver: sudo modprobe -r xe && sudo modprobe xe"
)

# External tools
console.print("\n external tools:")
for tool in ("clinfo", "sycl-ls", "intel_gpu_top", "nvtop", "lspci"):
path = shutil.which(tool)
console.print(f" {tool:<14} {path or '— missing —'}")

# sycl-ls device enumeration (most useful signal for "can SYCL see the GPU?")
sycl_ls_bin = shutil.which("sycl-ls")
console.print("\n sycl-ls device enumeration:")
if not sycl_ls_bin:
console.print(" [yellow]sycl-ls not found — install Intel oneAPI Base Toolkit[/yellow]")
else:
try:
sl = subprocess.run(
[sycl_ls_bin], capture_output=True, text=True, timeout=10
)
output = sl.stdout.strip()
if output:
for line in output.splitlines():
console.print(f" {line}")
else:
console.print(
" [yellow](no output — SYCL sees no devices; "
"GPU/render-node access likely missing)[/yellow]"
)
if sl.returncode != 0:
console.print(f" [yellow]sycl-ls exited {sl.returncode}[/yellow]")
if sl.stderr.strip():
for line in sl.stderr.strip().splitlines()[:5]:
console.print(f" [dim]{line}[/dim]")
except subprocess.TimeoutExpired:
console.print(" [yellow]sycl-ls timed out[/yellow]")

# Permissions
console.print("\n user groups:")
try:
Expand All @@ -227,8 +305,7 @@ def doctor(ctx: click.Context) -> None:
console.print(f" {needed:<14} {marker}")
if "render" not in groups or "video" not in groups:
console.print(
" [yellow]→ add yourself with `sudo usermod -aG render,video $USER` "
"and re-login.[/yellow]"
" [yellow]→ sudo usermod -aG render,video $USER then log out and back in.[/yellow]"
)

# oneAPI
Expand Down
14 changes: 14 additions & 0 deletions src/arc_llama/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ def _scan_pci() -> list[DetectedGPU]:
)
if driver is None:
gpu.notes.append("No kernel driver bound — install `xe` or `i915` modules.")
elif render is None:
gpu.notes.append(
f"driver '{driver}' bound but no render node found in sysfs "
"(DRM render node not created). "
"Check: ls /dev/dri/ and dmesg | grep -E '(xe|i915|drm)'"
)
found.append(gpu)
for i, g in enumerate(found):
g._index = i # type: ignore[attr-defined]
Expand Down Expand Up @@ -225,6 +231,14 @@ def detect_gpus(enrich: bool = True) -> list[DetectedGPU]:
return gpus


def render_nodes_in_dev() -> list[Path]:
"""Return renderD* device nodes present under /dev/dri/, sorted by name."""
dev_dri = Path("/dev/dri")
if not dev_dri.exists():
return []
return sorted(p for p in dev_dri.iterdir() if p.name.startswith("renderD"))


def lspci_intel_gpus() -> str:
"""Return raw `lspci -nn` output filtered to Intel display devices.

Expand Down
68 changes: 68 additions & 0 deletions src/arc_llama/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import os
import signal
import subprocess
import tempfile
import time
from dataclasses import dataclass
from pathlib import Path
Expand Down Expand Up @@ -162,6 +163,48 @@ def build_plan(
)


def _surface_crash_logs(name: str, log_path: Path | None) -> None:
"""Read the tail of the server's stderr log and emit diagnostic hints.

Called when the subprocess exits before passing the health check. If the
log contains the canonical SYCL "no device" message we print an actionable
checklist so the user knows exactly what to fix without having to grep logs
themselves.
"""
if log_path is None or not log_path.exists():
return
try:
with open(log_path, "rb") as f:
f.seek(0, 2)
size = f.tell()
f.seek(max(0, size - 8192))
tail = f.read().decode("utf-8", errors="replace")
except Exception:
return
lines = [line for line in tail.splitlines() if line.strip()][-40:]
if not lines:
return
log.error("[%s] last output before crash:", name)
for line in lines:
log.error("[%s] %s", name, line)
combined = tail.lower()
if "no device of requested type available" in combined:
log.error(
"[%s] SYCL/level_zero found no compute device. Checklist:\n"
" 1. render nodes present? ls /dev/dri/renderD*\n"
" 2. user in render group? sudo usermod -aG render,video $USER (re-login)\n"
" 3. driver init errors? dmesg | grep -E '(xe|i915|drm)'\n"
" 4. device visible? sycl-ls\n"
" 5. full diagnostics: arc-llama doctor",
name,
)
elif "level_zero" in combined and ("error" in combined or "failed" in combined):
log.error(
"[%s] level_zero adapter error — run `sycl-ls` and `arc-llama doctor`.",
name,
)


class LlamaServer:
"""One llama-server subprocess. Lifecycle: start → wait_ready → stop."""

Expand All @@ -170,6 +213,8 @@ def __init__(self, plan: LaunchPlan, name: str = "llama-server"):
self.name = name
self.process: subprocess.Popen[bytes] | None = None
self.started_at: float | None = None
self._log_path: Path | None = None # path to current stderr/log file
self._is_tmp_log: bool = False # True when _log_path is a temp file

@property
def is_running(self) -> bool:
Expand All @@ -181,11 +226,23 @@ def start(self, log_dir: Path | None = None) -> None:
return
stdout = subprocess.DEVNULL
stderr = subprocess.DEVNULL
_tmp_fh = None # temp file handle to close after Popen
if log_dir is not None:
log_dir.mkdir(parents=True, exist_ok=True)
log_path = log_dir / f"{self.name}.log"
stdout = open(log_path, "ab")
stderr = subprocess.STDOUT
self._log_path = log_path
self._is_tmp_log = False
else:
# Capture stderr to a temp file so wait_ready can surface crash
# messages such as "No device of requested type available".
_tmp_fh = tempfile.NamedTemporaryFile(
prefix=f"arc-llama-{self.name}-", suffix=".log", delete=False
)
stderr = _tmp_fh
self._log_path = Path(_tmp_fh.name)
self._is_tmp_log = True
log.info("[%s] starting: %s", self.name, " ".join(self.plan.argv))
self.process = subprocess.Popen(
self.plan.argv,
Expand All @@ -194,6 +251,9 @@ def start(self, log_dir: Path | None = None) -> None:
stderr=stderr,
preexec_fn=_preexec_isolate_and_pdeathsig,
)
# Parent closes its write copy; child keeps its own inherited fd.
if _tmp_fh is not None:
_tmp_fh.close()
self.started_at = time.time()

async def wait_ready(self, timeout: float = DEFAULT_HEALTH_TIMEOUT) -> bool:
Expand All @@ -202,6 +262,7 @@ async def wait_ready(self, timeout: float = DEFAULT_HEALTH_TIMEOUT) -> bool:
while time.time() < deadline:
if not self.is_running:
log.warning("[%s] process exited before becoming healthy", self.name)
_surface_crash_logs(self.name, self._log_path)
return False
try:
r = await client.get(self.plan.health_url)
Expand Down Expand Up @@ -236,3 +297,10 @@ def stop(self, drain_seconds: float = 3.0) -> None:
pass
self.process = None
self.started_at = None
if self._is_tmp_log and self._log_path is not None:
try:
self._log_path.unlink(missing_ok=True)
except Exception:
pass
self._log_path = None
self._is_tmp_log = False
2 changes: 1 addition & 1 deletion src/arc_llama/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ async def admin_edit_model(name: str, request: Request) -> dict:
try:
ub = int(body["ubatch_size"])
except (TypeError, ValueError):
raise HTTPException(status_code=400, detail="ubatch_size must be an integer")
raise HTTPException(status_code=400, detail="ubatch_size must be an integer") from None
if not (1 <= ub <= 4096):
raise HTTPException(status_code=400, detail="ubatch_size must be 1..4096")
recipe["ubatch_size"] = ub
Expand Down
1 change: 0 additions & 1 deletion tests/test_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ async def test_wait_ready_true_when_healthy(self, monkeypatch: pytest.MonkeyPatc
srv.started_at = 0.0

import httpx
original_get = httpx.AsyncClient.get

async def _fake_get(self, url):
if "/health" in url:
Expand Down
2 changes: 0 additions & 2 deletions tests/test_recipes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""Tests for arc_llama.recipes — VRAM math, recipe generation, KV sizing."""
from __future__ import annotations

import pytest

from arc_llama.arch import Arch
from arc_llama.recipes import (
DEFAULT_CTX_CAP,
Expand Down
Loading