diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c98fb3..39d46bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,11 +7,12 @@ on: jobs: test: - name: Python ${{ matrix.python-version }} - runs-on: ubuntu-latest + name: Python ${{ matrix.python-version }} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: + os: [ubuntu-latest, windows-latest] python-version: ["3.10", "3.11", "3.12"] steps: @@ -25,7 +26,7 @@ jobs: cache: pip - name: Install package - run: python -m pip install -e '.[dev]' + run: python -m pip install -e ".[dev]" - name: Lint run: ruff check . diff --git a/pyproject.toml b/pyproject.toml index e7e9ef3..d412fbb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ classifiers = [ "Development Status :: 4 - Beta", "License :: OSI Approved :: MIT License", "Operating System :: POSIX :: Linux", + "Operating System :: Microsoft :: Windows", "Programming Language :: Python :: 3 :: Only", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] diff --git a/src/arc_llama/agent/tools.py b/src/arc_llama/agent/tools.py index 2c13732..a346d7d 100644 --- a/src/arc_llama/agent/tools.py +++ b/src/arc_llama/agent/tools.py @@ -14,6 +14,7 @@ import json import os import subprocess +import sys from dataclasses import dataclass, field from pathlib import Path from typing import Any @@ -391,6 +392,9 @@ def run_command(command: str, root: Path, timeout: float = 60.0) -> ToolResult: f"Error: git history-mutating command '{bad_prefix}' is not allowed.", error=True, ) + env = os.environ.copy() + if sys.platform != "win32": + env.update({"PS1": "", "TERM": "dumb"}) try: result = subprocess.run( command, @@ -399,7 +403,7 @@ def run_command(command: str, root: Path, timeout: float = 60.0) -> ToolResult: capture_output=True, text=True, timeout=timeout, - env={**os.environ, "PS1": "", "TERM": "dumb"}, + env=env, ) except subprocess.TimeoutExpired: return ToolResult( @@ -454,7 +458,7 @@ def search_files(pattern: str, root: Path, path_glob: str = "*") -> ToolResult: with p.open("r", encoding="utf-8", errors="ignore") as f: for i, line in enumerate(f, start=1): if pattern in line: - rel = p.relative_to(root) + rel = p.relative_to(root).as_posix() matches.append(f"{rel}:{i}: {line.rstrip()}") except OSError: continue diff --git a/src/arc_llama/benchmark.py b/src/arc_llama/benchmark.py index fae87e8..6c72806 100644 --- a/src/arc_llama/benchmark.py +++ b/src/arc_llama/benchmark.py @@ -8,18 +8,15 @@ """ from __future__ import annotations -import json import logging -import statistics import time -from dataclasses import asdict, dataclass, field +from dataclasses import asdict, dataclass from pathlib import Path from typing import Any import httpx -from arc_llama.config import Config, ModelConfig, load_config -from arc_llama.recipes import KVCacheType, default_recipe, suggest_ctx +from arc_llama.config import Config, load_config log = logging.getLogger("arc_llama.benchmark") diff --git a/src/arc_llama/chat_store.py b/src/arc_llama/chat_store.py index 4642033..c360d28 100644 --- a/src/arc_llama/chat_store.py +++ b/src/arc_llama/chat_store.py @@ -27,7 +27,7 @@ def to_dict(self) -> dict[str, Any]: return {"role": self.role, "content": self.content, "timestamp": self.timestamp} @classmethod - def from_dict(cls, data: dict[str, Any]) -> "ChatMessage": + def from_dict(cls, data: dict[str, Any]) -> ChatMessage: return cls( role=data.get("role", ""), content=data.get("content", ""), @@ -55,7 +55,7 @@ def to_dict(self) -> dict[str, Any]: } @classmethod - def from_dict(cls, data: dict[str, Any]) -> "Chat": + def from_dict(cls, data: dict[str, Any]) -> Chat: return cls( id=data.get("id", ""), title=data.get("title", "Untitled chat"), @@ -170,3 +170,46 @@ def wipe(self) -> None: if self.directory.exists(): shutil.rmtree(self.directory) self.directory.mkdir(parents=True, exist_ok=True) + + def export_all(self) -> list[dict[str, Any]]: + """Return every stored chat as a list of plain dicts.""" + return [chat.to_dict() for chat in self.list_chats()] + + def import_chats( + self, + data: list[dict[str, Any]], + *, + overwrite: bool = False, + ) -> dict[str, int | list[str]]: + """Import a list of chat dicts. + + Args: + data: list of chat dicts in the format produced by ``Chat.to_dict``. + overwrite: if True, replace an existing chat with the same id. + + Returns: + A summary dict with ``imported``, ``skipped``, and ``errors`` counts. + """ + imported = 0 + skipped = 0 + errors: list[str] = [] + for item in data: + if not isinstance(item, dict): + errors.append("skipped non-dict entry") + continue + chat_id = item.get("id") + if not chat_id: + errors.append("skipped chat with missing id") + continue + path = self._chat_path(chat_id) + if path.exists() and not overwrite: + skipped += 1 + continue + try: + chat = Chat.from_dict(item) + except (TypeError, ValueError) as e: + errors.append(f"{chat_id}: invalid chat data ({e})") + continue + self._save(chat) + imported += 1 + return {"imported": imported, "skipped": skipped, "errors": len(errors), "error_details": errors} diff --git a/src/arc_llama/cli.py b/src/arc_llama/cli.py index b24267c..6ed2429 100644 --- a/src/arc_llama/cli.py +++ b/src/arc_llama/cli.py @@ -17,8 +17,10 @@ """ from __future__ import annotations +import json import logging import os +import platform import shutil import subprocess import sys @@ -46,13 +48,38 @@ console = Console() +_IS_WINDOWS = sys.platform == "win32" + + +class _JsonFormatter(logging.Formatter): + """Emit log records as single-line JSON objects.""" + + def format(self, record: logging.LogRecord) -> str: + obj = { + "timestamp": self.formatTime(record), + "level": record.levelname, + "name": record.name, + "message": record.getMessage(), + } + if record.exc_info: + obj["exception"] = self.formatException(record.exc_info) + return json.dumps(obj, default=str) + def _setup_logging(verbose: bool) -> None: level = logging.DEBUG if verbose else logging.INFO - logging.basicConfig( - level=level, - format="%(asctime)s %(levelname)s %(name)s: %(message)s", - ) + if os.environ.get("ARC_LLAMA_LOG_JSON"): + handler = logging.StreamHandler() + handler.setFormatter(_JsonFormatter()) + root = logging.getLogger() + root.setLevel(level) + root.handlers.clear() + root.addHandler(handler) + else: + logging.basicConfig( + level=level, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", + ) def _save_or_die(cfg: Config, path: Path) -> None: @@ -137,8 +164,14 @@ def init( sys.exit(1) gpus = detect_gpus() if not gpus: - console.print("[red]No Intel GPUs detected.[/red]") - console.print("Run [bold]arc-llama doctor[/bold] for a diagnosis.") + if _IS_WINDOWS: + console.print( + "[yellow]No Intel GPUs detected — Windows auto-detection is not " + "supported yet. Create a config manually or run this on WSL.[/yellow]" + ) + else: + console.print("[red]No Intel GPUs detected.[/red]") + console.print("Run [bold]arc-llama doctor[/bold] for a diagnosis.") sys.exit(2) server_path = _resolve_llama_server(llama_server) cfg = init_config_from_detection(gpus, llama_server_path=server_path) @@ -178,12 +211,19 @@ def doctor(ctx: click.Context) -> None: config_path: Path = ctx.obj["config_path"] console.print("[bold]arc-llama doctor[/bold]\n") - # Kernel + driver - console.print(f" kernel: {os.uname().release}") - has_xe = Path("/sys/module/xe").exists() - has_i915 = Path("/sys/module/i915").exists() - console.print(f" xe driver: {'loaded' if has_xe else 'not loaded'}") - console.print(f" i915 driver: {'loaded' if has_i915 else 'not loaded'}") + # Kernel + driver (Linux-only diagnostics) + if _IS_WINDOWS: + console.print(f" platform: Windows {platform.release()}") + console.print( + " [dim]Kernel/driver checks are not available on Windows.[/dim]" + ) + else: + uname = platform.uname() + console.print(f" kernel: {uname.release}") + has_xe = Path("/sys/module/xe").exists() + has_i915 = Path("/sys/module/i915").exists() + console.print(f" xe driver: {'loaded' if has_xe else 'not loaded'}") + console.print(f" i915 driver: {'loaded' if has_i915 else 'not loaded'}") # GPU detection (enrich=True so clinfo populates VRAM where xe doesn't via sysfs) gpus = detect_gpus(enrich=True) @@ -214,33 +254,49 @@ def doctor(ctx: click.Context) -> None: path = shutil.which(tool) console.print(f" {tool:<14} {path or '— missing —'}") - # Permissions - console.print("\n user groups:") - try: - out = subprocess.run(["id", "-nG"], capture_output=True, text=True, timeout=2) - groups = out.stdout.split() - except (FileNotFoundError, subprocess.TimeoutExpired): - groups = [] - for needed in ("render", "video"): - ok = needed in groups - marker = "[green]ok[/green]" if ok else "[yellow]missing[/yellow]" - console.print(f" {needed:<14} {marker}") - if "render" not in groups or "video" not in groups: - console.print( - " [yellow]→ add yourself with `sudo usermod -aG render,video $USER` " - "and re-login.[/yellow]" - ) + # Permissions (Linux-only) + if _IS_WINDOWS: + console.print("\n user groups:") + console.print(" [dim]Group checks are not available on Windows.[/dim]") + else: + console.print("\n user groups:") + try: + out = subprocess.run(["id", "-nG"], capture_output=True, text=True, timeout=2) + groups = out.stdout.split() + except (FileNotFoundError, subprocess.TimeoutExpired): + groups = [] + for needed in ("render", "video"): + ok = needed in groups + marker = "[green]ok[/green]" if ok else "[yellow]missing[/yellow]" + console.print(f" {needed:<14} {marker}") + if "render" not in groups or "video" not in groups: + console.print( + " [yellow]→ add yourself with `sudo usermod -aG render,video $USER` " + "and re-login.[/yellow]" + ) # oneAPI - oneapi_setvars = Path("/opt/intel/oneapi/setvars.sh") console.print("\n oneAPI:") - if oneapi_setvars.exists(): - console.print(f" setvars.sh: {oneapi_setvars}") + if _IS_WINDOWS: + oneapi_setvars = Path( + os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)") + ) / "Intel" / "oneAPI" / "setvars.bat" + if oneapi_setvars.exists(): + console.print(f" setvars.bat: {oneapi_setvars}") + else: + console.print( + " [yellow]Intel oneAPI setvars.bat not found — install Intel " + "oneAPI Base Toolkit if you're building llama.cpp from source.[/yellow]" + ) else: - console.print( - " [yellow]/opt/intel/oneapi/setvars.sh missing — install Intel " - "oneAPI Base Toolkit if you're building llama.cpp from source.[/yellow]" - ) + oneapi_setvars = Path("/opt/intel/oneapi/setvars.sh") + if oneapi_setvars.exists(): + console.print(f" setvars.sh: {oneapi_setvars}") + else: + console.print( + " [yellow]/opt/intel/oneapi/setvars.sh missing — install Intel " + "oneAPI Base Toolkit if you're building llama.cpp from source.[/yellow]" + ) # Config console.print("\n config:") @@ -604,9 +660,14 @@ def _on_signal(signum: int, _frame) -> None: # noqa: ANN001 _shutdown_subprocesses() # Re-raise as default so uvicorn's own handler (or python) finishes the job. _signal.signal(signum, _signal.SIG_DFL) - os.kill(os.getpid(), signum) + if _IS_WINDOWS: + sys.exit(0) + else: + os.kill(os.getpid(), signum) - for s in (_signal.SIGTERM, _signal.SIGINT): + for s in (getattr(_signal, "SIGTERM", None), _signal.SIGINT): + if s is None: + continue try: _signal.signal(s, _on_signal) except (OSError, ValueError): @@ -643,6 +704,9 @@ def mtp_info_cmd(path: Path) -> None: @click.option("--write", is_flag=True, help="Write the unit to ~/.config/systemd/user/") def systemd_unit(service_name: str, description: str, write: bool) -> None: """Print (or write) a systemd --user unit for `arc-llama serve`.""" + if _IS_WINDOWS: + console.print("[red]systemd is not available on Windows.[/red]") + sys.exit(1) arc = shutil.which("arc-llama") if not arc: arc = str(Path(sys.argv[0]).resolve()) diff --git a/src/arc_llama/config.py b/src/arc_llama/config.py index ecf1ff7..e83451e 100644 --- a/src/arc_llama/config.py +++ b/src/arc_llama/config.py @@ -60,14 +60,24 @@ def _xdg_config_home() -> Path: + if sys.platform == "win32": + return Path(os.environ.get("APPDATA") or Path.home() / "AppData" / "Roaming") return Path(os.environ.get("XDG_CONFIG_HOME") or Path.home() / ".config") def _xdg_data_home() -> Path: + if sys.platform == "win32": + return Path( + os.environ.get("LOCALAPPDATA") or Path.home() / "AppData" / "Local" + ) return Path(os.environ.get("XDG_DATA_HOME") or Path.home() / ".local" / "share") def _xdg_state_home() -> Path: + if sys.platform == "win32": + return Path( + os.environ.get("LOCALAPPDATA") or Path.home() / "AppData" / "Local" + ) return Path(os.environ.get("XDG_STATE_HOME") or Path.home() / ".local" / "state") @@ -252,12 +262,52 @@ def _strip_none(obj: Any) -> Any: return obj +def migrate_config(raw: dict[str, Any]) -> dict[str, Any]: + """Bump an on-disk config dict to the current schema version. + + Currently a no-op migration (v1 → v1), but the hook exists so future schema + changes can be handled automatically when users upgrade arc-llama. + """ + version = int(raw.get("version", 1)) + if version > CONFIG_VERSION: + raise ValueError( + f"config version {version} is newer than the supported version " + f"{CONFIG_VERSION}; upgrade arc-llama" + ) + raw["version"] = CONFIG_VERSION + # Ensure all top-level sections exist so downstream code can assume them. + raw.setdefault("server", {}) + raw.setdefault("paths", {}) + raw.setdefault("gpus", []) + raw.setdefault("models", []) + raw.setdefault("upstreams", []) + return raw + + +def validate_config(raw: dict[str, Any]) -> None: + """Basic structural validation for a loaded config dict.""" + if not isinstance(raw.get("version"), int): + raise ValueError("config 'version' must be an integer") + if not isinstance(raw.get("server", {}), dict): + raise ValueError("config 'server' must be a table") + if not isinstance(raw.get("paths", {}), dict): + raise ValueError("config 'paths' must be a table") + if not isinstance(raw.get("gpus", []), list): + raise ValueError("config 'gpus' must be an array") + if not isinstance(raw.get("models", []), list): + raise ValueError("config 'models' must be an array") + if not isinstance(raw.get("upstreams", []), list): + raise ValueError("config 'upstreams' must be an array") + + def load_config(path: Path | None = None) -> Config: path = path or default_config_path() if not path.exists(): return Config() with open(path, "rb") as f: raw = _toml_load(f) + raw = migrate_config(raw) + validate_config(raw) return Config( version=int(raw.get("version", CONFIG_VERSION)), server=ServerConfig(**raw.get("server", {})), diff --git a/src/arc_llama/launcher.py b/src/arc_llama/launcher.py index 2025ed8..969bca5 100644 --- a/src/arc_llama/launcher.py +++ b/src/arc_llama/launcher.py @@ -16,6 +16,7 @@ import os import signal import subprocess +import sys import time from dataclasses import dataclass from pathlib import Path @@ -32,6 +33,15 @@ DEFAULT_HEALTH_TIMEOUT = 120 # seconds — generous for cold-start SYCL JIT HEALTH_POLL_INTERVAL = 1.5 +# Log rotation for llama-server subprocess logs. +_MAX_LOG_BYTES = 50 * 1024 * 1024 +_LOG_BACKUPS = 3 + +_IS_WINDOWS = sys.platform == "win32" +# Not defined on POSIX. Fallback lets the Windows code path stay import-safe +# when exercised under tests that monkeypatch _IS_WINDOWS on a Linux runner. +_CTRL_BREAK_EVENT = getattr(signal, "CTRL_BREAK_EVENT", signal.SIGTERM) + # Linux prctl(2) constant. We don't import a real binding — one syscall. _PR_SET_PDEATHSIG = 1 @@ -82,6 +92,32 @@ def _preexec_isolate_and_pdeathsig() -> None: pass +def _rotate_log(log_path: Path) -> None: + """Rotate an existing log file so it doesn't grow unbounded. + + Keeps up to ``_LOG_BACKUPS`` historic files (``.log.1``, ``.log.2``, ...). + """ + if not log_path.exists(): + return + try: + if log_path.stat().st_size < _MAX_LOG_BYTES: + return + except OSError: + return + for i in range(_LOG_BACKUPS, 0, -1): + src = log_path.parent / f"{log_path.name}.{i}" + dst = log_path.parent / f"{log_path.name}.{i + 1}" + if src.exists(): + try: + src.replace(dst) + except OSError: + pass + try: + log_path.replace(log_path.parent / f"{log_path.name}.1") + except OSError: + pass + + @dataclass class LaunchPlan: """Everything needed to invoke llama-server for one model.""" @@ -172,6 +208,7 @@ def __init__(self, plan: LaunchPlan, name: str = "llama-server"): self.process: subprocess.Popen[bytes] | None = None self.started_at: float | None = None self._log_file: Any = None # file handle opened in start(), closed in stop() + self._log_path: Path | None = None @property def is_running(self) -> bool: @@ -183,19 +220,32 @@ def start(self, log_dir: Path | None = None) -> None: return stdout = subprocess.DEVNULL stderr = subprocess.DEVNULL + self._log_path = None if log_dir is not None: log_dir.mkdir(parents=True, exist_ok=True) log_path = log_dir / f"{self.name}.log" + _rotate_log(log_path) + self._log_path = log_path self._log_file = open(log_path, "ab") stdout = self._log_file stderr = subprocess.STDOUT log.info("[%s] starting: %s", self.name, " ".join(self.plan.argv)) + popen_kwargs: dict[str, Any] = {} + if _IS_WINDOWS: + # A new process group lets us terminate the whole subtree cleanly + # without Unix-specific killpg. The constant is only defined on + # Windows; the getattr guard keeps tests on Linux valid. + popen_kwargs["creationflags"] = getattr( + subprocess, "CREATE_NEW_PROCESS_GROUP", 0 + ) + else: + popen_kwargs["preexec_fn"] = _preexec_isolate_and_pdeathsig self.process = subprocess.Popen( self.plan.argv, env=self.plan.env, stdout=stdout, stderr=stderr, - preexec_fn=_preexec_isolate_and_pdeathsig, + **popen_kwargs, ) self.started_at = time.time() @@ -215,28 +265,65 @@ async def wait_ready(self, timeout: float = DEFAULT_HEALTH_TIMEOUT) -> bool: await asyncio.sleep(HEALTH_POLL_INTERVAL) return False + def tail_log(self, lines: int = 50) -> str: + """Return the last *lines* of the llama-server log, if any.""" + if self._log_path is None: + return "" + try: + text = self._log_path.read_text(encoding="utf-8", errors="replace") + except OSError: + return "" + all_lines = text.splitlines() + return "\n".join(all_lines[-lines:]) + def stop(self, drain_seconds: float = 3.0) -> None: if not self.is_running: return proc = self.process assert proc is not None log.info("[%s] stopping pid=%s", self.name, proc.pid) - try: - os.killpg(proc.pid, signal.SIGTERM) - except ProcessLookupError: - pass - try: - proc.wait(timeout=drain_seconds) - except subprocess.TimeoutExpired: - log.warning("[%s] SIGTERM timed out, sending SIGKILL", self.name) + if _IS_WINDOWS: + # proc.terminate()/kill() both just call TerminateProcess on Windows — + # there's no graceful/forceful distinction, and neither touches child + # processes. CTRL_BREAK_EVENT goes to the whole CREATE_NEW_PROCESS_GROUP + # (the closest equivalent to SIGTERM here); taskkill /T kills the whole + # subtree, mirroring killpg on the Linux side below. try: - os.killpg(proc.pid, signal.SIGKILL) - except ProcessLookupError: + proc.send_signal(_CTRL_BREAK_EVENT) + except (ProcessLookupError, OSError): pass try: proc.wait(timeout=drain_seconds) except subprocess.TimeoutExpired: + log.warning( + "[%s] CTRL_BREAK timed out, force-killing process tree", self.name + ) + subprocess.run( + ["taskkill", "/F", "/T", "/PID", str(proc.pid)], + capture_output=True, + timeout=drain_seconds, + ) + try: + proc.wait(timeout=drain_seconds) + except subprocess.TimeoutExpired: + pass + else: + try: + os.killpg(proc.pid, signal.SIGTERM) + except ProcessLookupError: pass + try: + proc.wait(timeout=drain_seconds) + except subprocess.TimeoutExpired: + log.warning("[%s] SIGTERM timed out, sending SIGKILL", self.name) + try: + os.killpg(proc.pid, signal.SIGKILL) + except ProcessLookupError: + pass + try: + proc.wait(timeout=drain_seconds) + except subprocess.TimeoutExpired: + pass self.process = None self.started_at = None if self._log_file is not None: diff --git a/src/arc_llama/router.py b/src/arc_llama/router.py index 64cdb30..cabf635 100644 --- a/src/arc_llama/router.py +++ b/src/arc_llama/router.py @@ -15,13 +15,38 @@ import asyncio import logging +import time from pathlib import Path +from typing import Any from arc_llama.config import Config, GPUConfig, ModelConfig from arc_llama.launcher import LlamaServer, build_plan +from arc_llama.recipes import KVCacheType, estimate_kv_bytes log = logging.getLogger("arc_llama.router") +# Rough overhead budgets for VRAM estimation (MiB). +_VRAM_COMPUTE_BUFFER_MB = 768 +_VRAM_SAFETY_MARGIN_MB = 256 + + +def _estimate_model_vram_mb(model: ModelConfig) -> int: + """Rough VRAM footprint for one model instance. + + Includes the mapped weights (model file size), the KV cache at the + configured context/type, and a fixed compute-buffer + safety margin. + """ + path = Path(model.path) + try: + model_file_mb = path.stat().st_size // (1_048_576) + except OSError: + model_file_mb = 0 + recipe = model.recipe or {} + ctx = int(recipe.get("ctx", 8192)) + kv_type = KVCacheType(recipe.get("cache_type_k", "f16")) + kv_mb = estimate_kv_bytes(ctx, kv_type, model.kv_class) // (1_048_576) + return model_file_mb + kv_mb + _VRAM_COMPUTE_BUFFER_MB + _VRAM_SAFETY_MARGIN_MB + class Router: """Owns one LlamaServer per registered model and serialises swaps.""" @@ -32,6 +57,13 @@ def __init__(self, cfg: Config, log_dir: Path | None = None): self._servers: dict[str, LlamaServer] = {} # keyed by model.name self._lock = asyncio.Lock() self._loading_futures: dict[str, asyncio.Future[tuple[ModelConfig, LlamaServer]]] = {} + self.metrics: dict[str, Any] = { + "loads": 0, + "stops": 0, + "load_errors": 0, + "last_load_at": None, + "last_error": None, + } self._build_servers() def _build_servers(self) -> None: @@ -119,6 +151,8 @@ async def ensure_active(self, query: str) -> tuple[ModelConfig, LlamaServer]: if target_srv.is_running: return target_model, target_srv + self._check_vram_fit(target_model, target_gpu) + # We are the one responsible for starting. loop = asyncio.get_running_loop() future: asyncio.Future[tuple[ModelConfig, LlamaServer]] = loop.create_future() @@ -127,25 +161,59 @@ async def ensure_active(self, query: str) -> tuple[ModelConfig, LlamaServer]: target_srv.start(log_dir=self.log_dir) ready = await target_srv.wait_ready() if not ready: + tail = target_srv.tail_log(lines=40) log.error( "model %s failed health-check; stopping it", target_model.name, ) target_srv.stop() - raise RuntimeError( - f"llama-server for {target_model.name} did not become healthy" - ) + self.metrics["load_errors"] += 1 + self.metrics["last_error"] = f"{target_model.name} did not become healthy" + detail = f"llama-server for {target_model.name} did not become healthy" + if tail: + detail += "\n\n--- last log lines ---\n" + tail + raise RuntimeError(detail) + self.metrics["loads"] += 1 + self.metrics["last_load_at"] = time.time() + self.metrics["last_error"] = None result = (target_model, target_srv) future.set_result(result) return result - except Exception: - future.set_exception(RuntimeError( - f"llama-server for {target_model.name} did not become healthy" - )) + except Exception as exc: + if not future.done(): + self.metrics["load_errors"] += 1 + self.metrics["last_error"] = str(exc) + future.set_exception(RuntimeError( + f"llama-server for {target_model.name} did not become healthy" + )) raise finally: self._loading_futures.pop(target_model.name, None) + def _check_vram_fit(self, target: ModelConfig, target_gpu: GPUConfig) -> None: + """Refuse to load *target* if its estimated VRAM won't fit on target_gpu. + + In multi-resident mode this also accounts for other loaded models that + share the same GPU. + """ + if not target_gpu.vram_mb: + return + used_mb = _estimate_model_vram_mb(target) + for name, srv in self._servers.items(): + if name == target.name or not srv.is_running: + continue + other = next((m for m in self.cfg.models if m.name == name), None) + if other is None or other.gpu_pci_slot != target_gpu.pci_slot: + continue + used_mb += _estimate_model_vram_mb(other) + if used_mb > target_gpu.vram_mb: + target_mb = _estimate_model_vram_mb(target) + raise RuntimeError( + f"model {target.name!r} needs ~{target_mb} MiB on GPU " + f"{target_gpu.pci_slot} but only {target_gpu.vram_mb} MiB is available " + f"(estimated total with co-residents: {used_mb} MiB)" + ) + async def _evict_for(self, target: ModelConfig, target_gpu: GPUConfig) -> None: """Stop the right neighbours so the target can have its GPU.""" single = self.cfg.server.single_resident @@ -169,6 +237,7 @@ async def stop_one(self, name: str) -> bool: if srv is None or not srv.is_running: return False srv.stop() + self.metrics["stops"] += 1 return True async def stop_all(self) -> int: @@ -179,6 +248,7 @@ async def stop_all(self) -> int: if srv.is_running: srv.stop() stopped += 1 + self.metrics["stops"] += stopped return stopped async def rebuild_model(self, name: str) -> tuple[bool, bool]: diff --git a/src/arc_llama/server.py b/src/arc_llama/server.py index f902385..996087b 100644 --- a/src/arc_llama/server.py +++ b/src/arc_llama/server.py @@ -20,6 +20,7 @@ import io import json import logging +import time import uuid from collections.abc import AsyncIterator from contextlib import asynccontextmanager @@ -62,6 +63,7 @@ async def lifespan(app: FastAPI): app.state.router = Router(cfg, log_dir=state_dir) app.state.upstream_mgr = UpstreamManager(cfg.upstreams) app.state.cfg = cfg + app.state.started_at = time.time() app.state.pending_confirmations: dict[str, tuple[asyncio.Event, dict[str, bool]]] = {} if state_dir: app.state.chat_store = ChatStore(state_dir / "chats") @@ -76,8 +78,44 @@ async def lifespan(app: FastAPI): app = FastAPI(title="arc-llama", version="0.1.0", lifespan=lifespan) @app.get("/health") - async def health() -> dict[str, str]: - return {"status": "ok"} + async def health(request: Request) -> dict[str, Any]: + """Liveness probe for the arc-llama router itself.""" + rt: Router = request.app.state.router + uptime = time.time() - request.app.state.started_at + loaded = [m.name for m in rt.all_models() if rt._servers.get(m.name) and rt._servers[m.name].is_running] + return { + "status": "ok", + "uptime_seconds": round(uptime, 2), + "loaded_models": loaded, + "loaded_model_count": len(loaded), + } + + @app.get("/admin/metrics") + async def admin_metrics(request: Request) -> dict[str, Any]: + """Operational counters and current GPU/model state.""" + rt: Router = request.app.state.router + c: Config = request.app.state.cfg + uptime = time.time() - request.app.state.started_at + loaded = [m.name for m in rt.all_models() if rt._servers.get(m.name) and rt._servers[m.name].is_running] + return { + "uptime_seconds": round(uptime, 2), + "loads": rt.metrics["loads"], + "stops": rt.metrics["stops"], + "load_errors": rt.metrics["load_errors"], + "last_load_at": rt.metrics["last_load_at"], + "last_error": rt.metrics["last_error"], + "active_models": loaded, + "gpus": [ + { + "pci_slot": g.pci_slot, + "name": g.name, + "arch": g.arch, + "vram_mb": g.vram_mb, + "enabled": g.enabled, + } + for g in c.gpus + ], + } @app.get("/v1/models") async def list_models(request: Request) -> dict: @@ -262,6 +300,65 @@ async def create_chat(request: Request) -> dict[str, Any]: raise HTTPException(status_code=409, detail=f"Chat already exists: {chat_id}") from None return chat.to_dict() + @app.post("/v1/chats/search") + async def search_chats(request: Request) -> dict[str, Any]: + """Search chat titles and messages. + + Body: {"query": "string", "limit": 20} + Returns matching chats with the indices of matching messages. + """ + try: + body = await request.json() + except json.JSONDecodeError as e: + raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e + query = body.get("query", "") + if not query: + raise HTTPException(status_code=400, detail="query is required") + limit = int(body.get("limit", 20)) + store: ChatStore = request.app.state.chat_store + results = store.search(query, limit=limit) + return { + "object": "list", + "data": [ + { + "chat": chat.summary(), + "matching_message_indices": indices, + } + for chat, indices in results + ], + } + + @app.get("/v1/chats/export") + async def export_chats(request: Request) -> dict[str, Any]: + """Export every chat as a portable JSON document.""" + store: ChatStore = request.app.state.chat_store + return {"version": 1, "exported_at": time.time(), "chats": store.export_all()} + + @app.post("/v1/chats/import") + async def import_chats(request: Request) -> dict[str, Any]: + """Import chats from an export document. + + Body: {"chats": [...], "overwrite": false} + Existing chats are skipped unless ``overwrite`` is true. + """ + try: + body = await request.json() + except json.JSONDecodeError as e: + raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e + if not isinstance(body, dict): + raise HTTPException(status_code=400, detail="Body must be a JSON object") + chats = body.get("chats") + if not isinstance(chats, list): + raise HTTPException(status_code=400, detail="'chats' must be an array") + store: ChatStore = request.app.state.chat_store + result = store.import_chats(chats, overwrite=bool(body.get("overwrite", False))) + return { + "imported": result["imported"], + "skipped": result["skipped"], + "errors": result["errors"], + "error_details": result["error_details"], + } + @app.get("/v1/chats/{chat_id}") async def get_chat(chat_id: str, request: Request) -> dict[str, Any]: """Return a full chat including all messages.""" @@ -332,34 +429,6 @@ async def delete_chat(chat_id: str, request: Request) -> dict[str, Any]: raise HTTPException(status_code=404, detail="Chat not found") return {"deleted": True} - @app.post("/v1/chats/search") - async def search_chats(request: Request) -> dict[str, Any]: - """Search chat titles and messages. - - Body: {"query": "string", "limit": 20} - Returns matching chats with the indices of matching messages. - """ - try: - body = await request.json() - except json.JSONDecodeError as e: - raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e - query = body.get("query", "") - if not query: - raise HTTPException(status_code=400, detail="query is required") - limit = int(body.get("limit", 20)) - store: ChatStore = request.app.state.chat_store - results = store.search(query, limit=limit) - return { - "object": "list", - "data": [ - { - "chat": chat.summary(), - "matching_message_indices": indices, - } - for chat, indices in results - ], - } - # ------------------------------------------------------------------ # Admin (used by the web UI / TUI) # ------------------------------------------------------------------ diff --git a/src/arc_llama/static/chat.css b/src/arc_llama/static/chat.css new file mode 100644 index 0000000..965293e --- /dev/null +++ b/src/arc_llama/static/chat.css @@ -0,0 +1,1637 @@ +:root { + --bg: #0a0c10; + --bg-elev: #11141a; + --bg-code: #080a0d; + --fg: #e6edf3; + --fg-dim: #8b949e; + --fg-mute: #5f6670; + --accent: #58a6ff; + --accent-deep: #1f6feb; + --accent-bright: #79c0ff; + --border: #30363d; + --border-subtle: #21262d; + --success: #3fb950; + --warn: #d29922; + --error: #f85149; + + --radius-sm: 4px; + --radius-md: 6px; + --radius-lg: 8px; + --space-unit: 4px; + --shadow-sm: 0 1px 2px rgba(0, 0, 0, 0.24); + --shadow-md: 0 4px 12px rgba(0, 0, 0, 0.32); + --shadow-lg: 0 8px 24px rgba(0, 0, 0, 0.35); + --transition-fast: 0.12s ease; + --transition-base: 0.18s ease; + + /* Legacy aliases consumed by existing JS/CSS */ + --glass: var(--bg-elev); + --glass-border: var(--border-subtle); + --user-tint: #151821; + --accent-gradient: linear-gradient(90deg, var(--accent-deep), var(--accent) 55%, var(--accent-bright)); + --radius: var(--radius-lg); + --shadow: var(--shadow-md); +} + +* { box-sizing: border-box; } + +html, body { + margin: 0; + padding: 0; + height: 100%; +} + +body { + background: var(--bg); + color: var(--fg); + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, "Helvetica Neue", Arial, sans-serif; + font-size: 14px; + line-height: 1.5; + display: flex; + flex-direction: column; + align-items: center; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +a { color: var(--fg-dim); text-decoration: none; transition: color var(--transition-fast); } +a:hover { color: var(--accent-bright); } + +::-webkit-scrollbar { width: 8px; height: 8px; } +::-webkit-scrollbar-track { background: transparent; } +::-webkit-scrollbar-thumb { background: rgba(255, 255, 255, 0.10); border-radius: 4px; } +::-webkit-scrollbar-thumb:hover { background: rgba(255, 255, 255, 0.16); } + +/* ------------------------------------------------------------------ + Header + ------------------------------------------------------------------ */ +header { + width: 100%; + max-width: 900px; + padding: calc(var(--space-unit) * 4) calc(var(--space-unit) * 6); + display: flex; + align-items: center; + justify-content: space-between; + gap: calc(var(--space-unit) * 4); + background: var(--bg-elev); + border-bottom: 1px solid var(--border-subtle); + box-shadow: var(--shadow-sm); +} + +.brand { + display: flex; + align-items: baseline; + gap: calc(var(--space-unit) * 3); +} + +header h1 { + margin: 0; + font-size: 18px; + font-weight: 700; + letter-spacing: -0.02em; + color: var(--fg); +} + +.tagline { + font-size: 12px; + color: var(--fg-mute); + font-weight: 500; +} + +.meta { + display: flex; + align-items: center; + gap: calc(var(--space-unit) * 3); +} + +.mode-toggle { + display: inline-flex; + align-items: center; + background: var(--bg-code); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + padding: 3px; + gap: 2px; +} + +.mode-toggle button { + appearance: none; + background: transparent; + border: 1px solid transparent; + border-radius: var(--radius-sm); + color: var(--fg-dim); + font: inherit; + font-size: 12px; + font-weight: 600; + padding: 5px 12px; + cursor: pointer; + transition: background var(--transition-fast), color var(--transition-fast), border-color var(--transition-fast), box-shadow var(--transition-fast); +} + +.mode-toggle button:hover { color: var(--fg); } + +.mode-toggle button.active { + background: var(--accent); + color: #fff; + border-color: var(--accent); + box-shadow: var(--shadow-sm); +} + +.manager-link { + display: inline-flex; + align-items: center; + gap: 4px; + font-size: 12px; + font-weight: 600; + color: var(--fg-dim); + padding: 6px 10px; + border-radius: var(--radius-md); + border: 1px solid var(--border-subtle); + background: var(--bg); + transition: color var(--transition-fast), background var(--transition-fast), border-color var(--transition-fast); +} + +.manager-link:hover { + color: var(--accent-bright); + background: rgba(88, 166, 255, 0.08); + border-color: rgba(88, 166, 255, 0.25); +} + +/* ------------------------------------------------------------------ + Model bar + ------------------------------------------------------------------ */ +.model-bar { + width: 100%; + max-width: 900px; + padding: calc(var(--space-unit) * 3) calc(var(--space-unit) * 6) calc(var(--space-unit) * 4); + display: flex; + align-items: center; + gap: 12px; +} + +.model-select { + appearance: none; + background: var(--bg-elev) url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='10' viewBox='0 0 12 12'%3E%3Cpath fill='%238b949e' d='M6 8L1 3h10z'/%3E%3C/svg%3E") no-repeat right 11px center; + color: var(--fg); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + padding: 7px 30px 7px 12px; + font: inherit; + font-size: 13px; + font-weight: 500; + cursor: pointer; + min-width: 260px; + transition: border-color var(--transition-fast), box-shadow var(--transition-fast), background-color var(--transition-fast); +} + +.model-select:hover { border-color: var(--border); } + +.model-select:focus { + outline: none; + border-color: var(--accent); + box-shadow: 0 0 0 2px rgba(88, 166, 255, 0.15); +} + +.model-select option { background: var(--bg-elev); color: var(--fg); } + +.model-status { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 3px 10px; + border-radius: 999px; + font-size: 11px; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.06em; + border: 1px solid transparent; + line-height: 1.2; + color: var(--fg-mute); + background: rgba(255, 255, 255, 0.04); + border-color: var(--border); + transition: color 0.2s, background 0.2s, border-color 0.2s; +} + +.model-status .indicator { + width: 6px; + height: 6px; + border-radius: 50%; + background: currentColor; + opacity: 0.85; +} + +.model-status .name { font-weight: 600; color: inherit; } + +.model-status.ready { + color: var(--success); + background: rgba(63, 185, 80, 0.10); + border-color: rgba(63, 185, 80, 0.25); +} + +.model-status.loading { + color: var(--accent-bright); + background: rgba(88, 166, 255, 0.10); + border-color: rgba(88, 166, 255, 0.28); +} + +.model-status.swapping { + color: var(--warn); + background: rgba(210, 153, 34, 0.10); + border-color: rgba(210, 153, 34, 0.28); +} + +.model-status.loading .indicator, +.model-status.swapping .indicator { animation: pulse 1.2s infinite; } + +@keyframes pulse { + 0%, 100% { opacity: 0.4; } + 50% { opacity: 1; } +} + +@keyframes spin { + to { transform: rotate(360deg); } +} + +/* ------------------------------------------------------------------ + Main chat log + ------------------------------------------------------------------ */ +main { + flex: 1; + width: 100%; + max-width: 900px; + overflow-y: auto; + padding: 16px 24px; + display: flex; + flex-direction: column; + gap: 16px; +} + +.empty-state { + margin: auto; + padding: calc(var(--space-unit) * 12) calc(var(--space-unit) * 6); + text-align: center; + color: var(--fg-mute); + font-size: 13px; + display: flex; + flex-direction: column; + align-items: center; + gap: calc(var(--space-unit) * 2); + max-width: 360px; +} + +.empty-state .icon { + width: 40px; + height: 40px; + border-radius: var(--radius-lg); + background: rgba(255, 255, 255, 0.03); + border: 1px solid var(--border-subtle); + display: flex; + align-items: center; + justify-content: center; + font-size: 18px; + margin-bottom: calc(var(--space-unit) * 2); + color: var(--fg-dim); +} + +.empty-state .icon svg { fill: currentColor; } + +.empty-state .title { + color: var(--fg-dim); + font-weight: 600; + font-size: 14px; +} + +.empty-state .hint { + color: var(--fg-mute); + font-size: 12px; + line-height: 1.5; +} + +.message { + background: var(--bg-elev); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-lg); + padding: 18px; + max-width: 100%; + box-shadow: var(--shadow-sm); + animation: msg-in 0.2s ease; +} + +@keyframes msg-in { + from { opacity: 0; transform: translateY(6px); } + to { opacity: 1; transform: translateY(0); } +} + +.message.user { + background: rgba(88, 166, 255, 0.06); + border-left: 3px solid var(--accent); + align-self: flex-end; + max-width: 82%; +} + +.message.assistant { + border-left: 3px solid var(--accent-bright); +} + +.message.system { + background: rgba(88, 166, 255, 0.05); + border-color: rgba(88, 166, 255, 0.15); + align-self: center; + max-width: 92%; + font-size: 13px; +} + +.message.error-card { + background: rgba(248, 81, 73, 0.06); + border-color: rgba(248, 81, 73, 0.2); + color: #ffb3ae; +} + +.message .role { + font-size: 11px; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--fg-mute); + margin-bottom: 10px; + display: flex; + align-items: center; + gap: 8px; +} + +.message.assistant .role { color: var(--accent-bright); } +.message.user .role { color: var(--fg-dim); } +.message.system .role { color: var(--accent-bright); } +.message.error-card .role { color: var(--error); } + +.message .content { + word-wrap: break-word; +} + +.message.user .content, +.message.system .content, +.message.error-card .content { + white-space: pre-wrap; +} + +.message.assistant .content { + font-size: 14px; + line-height: 1.65; +} + +.message.assistant .content > *:last-child { + margin-bottom: 0; +} + +.message.user .content { + font-weight: 400; + font-size: 14px; + line-height: 1.55; +} + +.token-chunk { + opacity: 0.5; + transition: opacity 0.12s ease; +} +.token-chunk.revealed { opacity: 1; } + +.message pre { + background: var(--bg-code); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + padding: 14px 16px; + overflow-x: auto; + margin: 14px 0; +} + +.message pre code { + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + font-size: 13px; + line-height: 1.6; + color: #c9d1d9; + background: transparent; + padding: 0; +} + +.code-block-wrapper { + position: relative; + margin: 14px 0; + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + background: var(--bg-code); + overflow: hidden; +} + +.code-block-wrapper .code-lang { + position: absolute; + top: 6px; + left: 12px; + font-size: 11px; + font-weight: 600; + color: var(--fg-mute); + text-transform: uppercase; + letter-spacing: 0.04em; + pointer-events: none; +} + +.code-block-wrapper pre { + margin: 0; + border: none; + border-radius: 0; + padding: 34px 16px 14px; + background: transparent; +} + +.code-block-wrapper .copy-code-btn { + position: absolute; + top: 6px; + right: 6px; + display: inline-flex; + align-items: center; + justify-content: center; + width: 28px; + height: 28px; + padding: 0; + background: rgba(255, 255, 255, 0.06); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-sm); + color: var(--fg-dim); + cursor: pointer; + opacity: 0; + transition: opacity var(--transition-fast), background var(--transition-fast), border-color var(--transition-fast), color var(--transition-fast); +} + +.code-block-wrapper:hover .copy-code-btn { opacity: 1; } + +.code-block-wrapper .copy-code-btn:hover { + background: rgba(88, 166, 255, 0.12); + border-color: rgba(88, 166, 255, 0.35); + color: var(--accent-bright); +} + +.code-block-wrapper .copy-code-btn.copied { + background: rgba(63, 185, 80, 0.15); + border-color: rgba(63, 185, 80, 0.4); + color: var(--success); + opacity: 1; +} + +.message code { + background: rgba(230, 237, 243, 0.08); + padding: 2px 5px; + border-radius: var(--radius-sm); + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + font-size: 12px; + color: var(--accent-bright); +} + +.message p { margin: 0 0 10px; } +.message p:last-child { margin-bottom: 0; } + +.message h1, .message h2, .message h3, .message h4, .message h5, .message h6 { + margin: 18px 0 10px; + font-weight: 700; + line-height: 1.25; + color: var(--fg); +} + +.message h1 { font-size: 18px; } +.message h2 { font-size: 16px; } +.message h3 { font-size: 15px; } +.message h4 { font-size: 14px; } +.message h5 { font-size: 13px; } +.message h6 { font-size: 12px; color: var(--fg-dim); } + +.message blockquote { + margin: 12px 0; + padding: 10px 16px; + border-left: 2px solid var(--accent); + background: rgba(88, 166, 255, 0.05); + border-radius: 0 var(--radius-sm) var(--radius-sm) 0; + color: var(--fg-dim); +} + +.message ul, .message ol { + margin: 10px 0; + padding-left: 24px; +} + +.message li { margin: 4px 0; } +.message del { text-decoration: line-through; opacity: 0.55; } + +.message hr { + border: none; + height: 1px; + background: var(--border-subtle); + margin: 16px 0; +} + +.message a { + color: var(--accent-bright); + text-decoration: underline; + text-underline-offset: 2px; +} + +.message a:hover { color: var(--accent); } + +.thinking-block { + margin-bottom: 14px; + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + overflow: hidden; + background: var(--bg-code); +} + +.thinking-toggle { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 12px; + background: rgba(88, 166, 255, 0.06); + cursor: pointer; + user-select: none; + font-size: 11px; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--fg-dim); + transition: background var(--transition-fast); +} + +.thinking-toggle:hover { background: rgba(88, 166, 255, 0.10); } + +.thinking-toggle .chevron { + transition: transform var(--transition-fast); + font-size: 9px; + display: inline-block; + color: var(--accent-bright); +} + +.thinking-toggle.open .chevron { transform: rotate(90deg); } + +.thinking-content { + font-size: 12px; + color: var(--fg-dim); + font-style: italic; + line-height: 1.55; + white-space: pre-wrap; + max-height: 0; + overflow: hidden; + transition: max-height 0.25s ease, padding 0.25s ease; + padding: 0 12px; +} + +.thinking-content.open { + max-height: 2000px; + padding: 10px 12px; +} + +/* ------------------------------------------------------------------ + Input area + ------------------------------------------------------------------ */ +.input-area { + width: 100%; + max-width: 900px; + padding: 12px 24px 24px; +} + +.input-wrap { + position: relative; + display: flex; + align-items: flex-end; + gap: 8px; + background: var(--bg-elev); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-lg); + padding: 10px 12px 10px 14px; + box-shadow: var(--shadow-sm); + transition: border-color var(--transition-base), box-shadow var(--transition-base); +} + +.input-wrap:focus-within, +.input-wrap.generating { + border-color: var(--accent); + box-shadow: 0 0 0 3px rgba(88, 166, 255, 0.12), var(--shadow-md); +} + +#message-input { + flex: 1; + background: transparent; + border: none; + color: var(--fg); + font: inherit; + font-size: 14px; + line-height: 1.5; + resize: none; + outline: none; + min-height: 24px; + max-height: 96px; + padding: 7px 0; + field-sizing: content; +} + +#message-input::placeholder { color: var(--fg-mute); } + +#send-button { + flex: 0 0 auto; + width: 34px; + height: 34px; + border-radius: var(--radius-sm); + border: none; + background: var(--accent-deep); + color: #fff; + display: flex; + align-items: center; + justify-content: center; + cursor: pointer; + transition: background var(--transition-fast), transform 0.1s, box-shadow var(--transition-fast); +} + +#send-button:hover { background: var(--accent); } +#send-button:active { transform: translateY(1px); } +#send-button:disabled { opacity: 0.45; cursor: not-allowed; transform: none; } +#send-button svg { width: 16px; height: 16px; fill: currentColor; } + +#attach-button { + flex: 0 0 auto; + width: 34px; + height: 34px; + border-radius: var(--radius-sm); + border: 1px solid transparent; + background: transparent; + color: var(--fg-mute); + display: flex; + align-items: center; + justify-content: center; + cursor: pointer; + transition: color var(--transition-fast), background var(--transition-fast), border-color var(--transition-fast); +} + +#attach-button:hover { + color: var(--accent-bright); + background: rgba(88, 166, 255, 0.08); + border-color: rgba(88, 166, 255, 0.15); +} + +#attach-button:disabled { opacity: 0.45; cursor: not-allowed; } +#attach-button svg { width: 18px; height: 18px; fill: currentColor; } + +#pdf-input { display: none; } + +#attachment-strip { + display: flex; + align-items: center; + gap: 8px; + flex: 0 1 auto; + min-width: 0; + overflow-x: auto; + padding-right: 4px; + scrollbar-width: none; +} + +#attachment-strip::-webkit-scrollbar { display: none; } + +.attachment-chip { + flex: 0 0 auto; + display: inline-flex; + align-items: center; + gap: 6px; + padding: 5px 9px; + border-radius: var(--radius-sm); + background: rgba(88, 166, 255, 0.10); + border: 1px solid rgba(88, 166, 255, 0.2); + color: var(--fg-dim); + font-size: 12px; + max-width: 180px; + transition: opacity var(--transition-base), border-color var(--transition-fast), background var(--transition-fast); +} + +.attachment-chip:hover { border-color: rgba(88, 166, 255, 0.35); } + +.attachment-chip.error { + background: rgba(248, 81, 73, 0.08); + border-color: rgba(248, 81, 73, 0.2); + color: #ffb3ae; +} + +.attachment-chip.error:hover { border-color: rgba(248, 81, 73, 0.35); } + +.attachment-chip.processing { opacity: 0.7; } + +.attachment-chip .filename { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + direction: rtl; +} + +.attachment-chip .remove { + flex: 0 0 auto; + width: 16px; + height: 16px; + border: none; + border-radius: var(--radius-sm); + background: transparent; + color: inherit; + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + font-size: 12px; + line-height: 1; + opacity: 0.7; + transition: opacity var(--transition-fast), background var(--transition-fast); +} + +.attachment-chip .remove:hover { opacity: 1; background: rgba(255, 255, 255, 0.08); } + +.attachment-chip .remove svg { fill: currentColor; } + +.attachment-chip .attachment-icon { + flex: 0 0 auto; + display: inline-flex; + align-items: center; + justify-content: center; + color: var(--accent-bright); +} + +.attachment-chip .attachment-icon svg { fill: currentColor; } + +.attachment-chip .spinner { + width: 12px; + height: 12px; + border: 2px solid currentColor; + border-top-color: transparent; + border-radius: 50%; + animation: spin 1s linear infinite; + opacity: 0.7; +} + +.hint { + text-align: center; + font-size: 11px; + color: var(--fg-mute); + margin-top: 8px; +} + +/* ------------------------------------------------------------------ + Context meter + ------------------------------------------------------------------ */ +.ctx-meter { + width: 100%; + max-width: 900px; + padding: 0 24px 12px; + opacity: 0; + transition: opacity 0.4s ease; +} + +.ctx-meter.visible { opacity: 1; } + +.ctx-bar-track { + height: 4px; + background: var(--border-subtle); + border-radius: 999px; + overflow: hidden; + margin-bottom: 6px; +} + +.ctx-bar-fill { + height: 100%; + border-radius: 999px; + background: var(--accent-gradient); + width: 0%; + transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1), filter 0.4s ease; +} + +.ctx-bar-fill.warn { filter: brightness(1.25); } +.ctx-bar-fill.critical { filter: brightness(1.6) saturate(0.5); } + +.ctx-label { + display: flex; + justify-content: space-between; + font-size: 11px; + color: var(--fg-mute); +} + +.ctx-tps { color: var(--fg-dim); } + +/* ------------------------------------------------------------------ + Side-panel toggles + ------------------------------------------------------------------ */ +#settings-toggle, #history-toggle { + position: fixed; + bottom: 20px; + width: 40px; + height: 40px; + border-radius: var(--radius-md); + border: 1px solid var(--border-subtle); + background: var(--bg-elev); + color: var(--fg-mute); + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + transition: color var(--transition-fast), border-color var(--transition-fast), background var(--transition-fast), transform var(--transition-base); + z-index: 100; + box-shadow: var(--shadow-sm); +} + +#history-toggle { left: 20px; } +#settings-toggle { right: 20px; } + +#settings-toggle:hover, #history-toggle:hover { + color: var(--accent-bright); + border-color: rgba(88, 166, 255, 0.25); + background: var(--bg); +} + +#settings-toggle.open { transform: rotate(45deg); color: var(--accent-bright); border-color: rgba(88, 166, 255, 0.25); } +#history-toggle.open { color: var(--accent-bright); border-color: rgba(88, 166, 255, 0.25); } + +/* ------------------------------------------------------------------ + Side panels + ------------------------------------------------------------------ */ +#settings-panel, #history-panel { + position: fixed; + top: 0; + bottom: 0; + width: 300px; + background: var(--bg); + padding: 24px; + transform: translateX(100%); + transition: transform 0.25s ease; + z-index: 99; + overflow-y: auto; + display: flex; + flex-direction: column; + gap: 16px; + box-shadow: var(--shadow-lg); +} + +#settings-panel { right: 0; border-left: 1px solid var(--border-subtle); } +#history-panel { left: 0; border-right: 1px solid var(--border-subtle); transform: translateX(-100%); } +#settings-panel.open { transform: translateX(0); } +#history-panel.open { transform: translateX(0); } + +.s-title, .h-title { + font-size: 11px; + font-weight: 700; + color: var(--fg-mute); + text-transform: uppercase; + letter-spacing: 0.08em; +} + +.s-model-name { + font-size: 13px; + font-weight: 600; + color: var(--accent-bright); + word-break: break-all; + margin-top: 4px; +} + +.s-field { display: flex; flex-direction: column; gap: 6px; } + +.s-field label { + font-size: 10px; + color: var(--fg-mute); + text-transform: uppercase; + letter-spacing: 0.06em; + font-weight: 700; +} + +.s-field input, +.s-field select { + background: var(--bg-elev); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + color: var(--fg); + font: inherit; + font-size: 13px; + padding: 7px 10px; + transition: border-color var(--transition-fast), box-shadow var(--transition-fast), background var(--transition-fast); +} + +.s-field input:hover, +.s-field select:hover { border-color: var(--border); } + +.s-field input:focus, +.s-field select:focus { + outline: none; + border-color: var(--accent); + box-shadow: 0 0 0 2px rgba(88, 166, 255, 0.12); +} + +.s-field select option { background: var(--bg-elev); } + +.s-apply { + width: 100%; + padding: 9px; + border-radius: var(--radius-md); + border: none; + background: var(--accent-deep); + color: #fff; + font: inherit; + font-size: 13px; + font-weight: 600; + cursor: pointer; + transition: background var(--transition-fast), transform 0.1s; +} + +.s-apply:hover { background: var(--accent); } +.s-apply:active { transform: translateY(1px); } +.s-apply:disabled { opacity: 0.45; cursor: not-allowed; transform: none; } + +.s-note { font-size: 11px; color: var(--fg-mute); text-align: center; } +.s-upstream { font-size: 13px; color: var(--fg-mute); text-align: center; padding: 20px 0; } +.s-feedback { font-size: 12px; text-align: center; min-height: 16px; } + +.h-new { + width: 100%; + padding: 8px; + border-radius: var(--radius-md); + border: 1px solid var(--border-subtle); + background: var(--bg-elev); + color: var(--fg); + font: inherit; + font-size: 13px; + font-weight: 600; + cursor: pointer; + transition: border-color var(--transition-fast), background var(--transition-fast), color var(--transition-fast), transform 0.1s; +} + +.h-new:hover { border-color: var(--accent); background: rgba(88, 166, 255, 0.08); color: var(--accent-bright); } +.h-new:active { transform: translateY(1px); } + +.h-actions { + display: flex; + gap: 8px; + margin: 10px 0 12px; +} + +.h-action { + flex: 1; + padding: 6px; + border-radius: var(--radius-md); + border: 1px solid var(--border-subtle); + background: var(--bg-elev); + color: var(--fg-mute); + font: inherit; + font-size: 12px; + cursor: pointer; + transition: border-color var(--transition-fast), color var(--transition-fast); +} + +.h-action:hover { border-color: var(--accent); color: var(--accent-bright); } + +.h-list { display: flex; flex-direction: column; gap: 8px; } +.h-empty { font-size: 13px; color: var(--fg-mute); text-align: center; padding: 24px 0; } + +.h-card { + position: relative; + background: var(--bg-elev); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + padding: 12px; + cursor: pointer; + transition: border-color var(--transition-fast), background var(--transition-fast), transform 0.1s; + box-shadow: var(--shadow-sm); +} + +.h-card:hover { + border-color: rgba(88, 166, 255, 0.25); + background: rgba(88, 166, 255, 0.05); +} + +.h-card:active { transform: translateY(1px); } + +.h-card-title { + font-size: 13px; + font-weight: 600; + color: var(--fg); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + padding-right: 24px; +} + +.h-card-meta { + display: flex; + align-items: center; + justify-content: space-between; + margin-top: 4px; + font-size: 11px; + color: var(--fg-mute); +} + +.h-delete { + position: absolute; + top: 10px; + right: 10px; + width: 20px; + height: 20px; + border-radius: var(--radius-sm); + border: none; + background: transparent; + color: var(--fg-mute); + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + font-size: 13px; + line-height: 1; + transition: color var(--transition-fast), background var(--transition-fast); +} + +.h-delete:hover { color: var(--error); background: rgba(248, 81, 73, 0.08); } + +#chat-log.hidden { display: none; } + +@keyframes thinking-dots { + 0%, 100% { opacity: 0.3; transform: translateY(0); } + 50% { opacity: 1; transform: translateY(-2px); } +} + +.agent-pill { + display: inline-flex; + align-items: center; + gap: 8px; + align-self: flex-start; + font-size: 11px; + color: var(--fg-dim); + background: var(--bg-elev); + border: 1px solid var(--border-subtle); + border-radius: 999px; + padding: 5px 12px; + margin-bottom: 10px; + animation: agent-in 0.25s ease; +} + +@keyframes agent-in { + from { opacity: 0; transform: translateY(4px); } + to { opacity: 1; transform: translateY(0); } +} + +.agent-pill .spinner { + width: 12px; + height: 12px; + border: 2px solid var(--fg-dim); + border-top-color: transparent; + border-radius: 50%; + animation: spin 1s linear infinite; +} + +/* ------------------------------------------------------------------ + Agent log + ------------------------------------------------------------------ */ +#agent-log { + flex: 1; + width: 100%; + max-width: 900px; + overflow-y: auto; + padding: 16px 24px; + font-size: 14px; + line-height: 1.6; + color: var(--fg); + display: flex; + flex-direction: column; + gap: 12px; +} + +#agent-log.hidden { display: none; } + +.agent-log-line:empty, +.agent-assistant:empty { display: none; } + +.agent-ascii-bg { + position: fixed; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + font-size: 12px; + line-height: 1.15; + white-space: pre; + color: var(--fg-dim); + opacity: 0.25; + pointer-events: none; + user-select: none; + z-index: 0; + transition: opacity 0.4s ease, color 0.4s ease; + max-width: 90vw; + max-height: 90vh; + overflow: hidden; + text-shadow: 0 0 20px rgba(0, 0, 0, 0.5); +} + +.agent-ascii-bg.state-thinking { opacity: 0.40; color: var(--accent-bright); } +.agent-ascii-bg.state-working { opacity: 0.35; color: var(--warn); } +.agent-ascii-bg.state-success { opacity: 0.35; color: var(--success); } +.agent-ascii-bg.state-error { opacity: 0.40; color: var(--error); } +.agent-ascii-bg.state-done { opacity: 0.35; color: var(--success); } + +#agent-log > *:not(.agent-ascii-bg) { position: relative; z-index: 1; } + +.agent-log-line { + position: relative; + padding: 10px 0 10px 18px; + border-left: 2px solid transparent; + animation: agent-fade-in 0.2s ease; + width: 100%; +} + +@keyframes agent-fade-in { + from { opacity: 0; transform: translateX(-4px); } + to { opacity: 1; transform: translateX(0); } +} + +.agent-log-line::before { + content: ""; + position: absolute; + left: -2px; + top: 0; + bottom: 0; + width: 2px; + background: var(--border-subtle); +} + +.agent-log-line.status::before { background: var(--fg-mute); opacity: 0.4; } +.agent-log-line.assistant::before { background: var(--accent); opacity: 0.5; } +.agent-log-line.tool::before { background: var(--warn); opacity: 0.5; } +.agent-log-line.error::before { background: var(--error); opacity: 0.5; } +.agent-log-line.done::before { background: var(--success); opacity: 0.5; } + +.agent-prompt { + color: var(--accent-bright); + margin-bottom: 12px; + padding-left: 18px; + border-left: 2px solid var(--accent); + animation: agent-fade-in 0.2s ease; + font-weight: 600; +} + +.agent-prompt::before { + content: "> "; + color: var(--accent-bright); + font-weight: 700; +} + +.agent-status { color: var(--fg-mute); font-size: 12px; opacity: 0.9; } + +.agent-thinking { + display: flex; + align-items: center; + gap: 8px; + color: var(--fg-dim); + font-style: italic; +} + +.agent-thinking .dot { + width: 5px; + height: 5px; + border-radius: 50%; + background: var(--accent-bright); + animation: thinking-dots 1.2s ease-in-out infinite; +} + +.agent-thinking .dot:nth-child(2) { animation-delay: 0.15s; } +.agent-thinking .dot:nth-child(3) { animation-delay: 0.3s; } + +.agent-assistant { + color: var(--fg); + word-break: break-word; +} + +.agent-assistant p { margin: 0 0 10px; } +.agent-assistant p:last-child { margin-bottom: 0; } +.agent-assistant pre { + background: var(--bg-code); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + padding: 12px 14px; + overflow-x: auto; + margin: 8px 0; + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + font-size: 12px; +} +.agent-assistant code { + background: rgba(230, 237, 243, 0.08); + padding: 2px 5px; + border-radius: var(--radius-sm); + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + font-size: 12px; + color: var(--accent-bright); +} +.agent-assistant ul, .agent-assistant ol { margin: 8px 0; padding-left: 22px; } +.agent-assistant li { margin: 3px 0; } +.agent-assistant > *:last-child { margin-bottom: 0; } +.agent-assistant .code-block-wrapper:last-child { margin-bottom: 0; } + +.agent-assistant pre { + background: var(--bg-code); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + padding: 12px 14px; + overflow-x: auto; + margin: 8px 0 0; + font-family: inherit; + font-size: 12px; + white-space: pre-wrap; + word-break: break-word; +} + +.agent-error { color: var(--error); white-space: pre-wrap; word-break: break-word; } + +.agent-error pre { + background: transparent; + border: none; + padding: 0; + margin: 4px 0 0; + color: inherit; + font-family: inherit; + font-size: 12px; + white-space: pre-wrap; + word-break: break-word; +} + +.agent-done { color: var(--fg-mute); font-size: 12px; display: inline-flex; align-items: center; gap: 6px; } +.agent-done svg { width: 14px; height: 14px; fill: currentColor; } + +.agent-tool { + display: flex; + flex-direction: column; + gap: 6px; + background: var(--bg-elev); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + padding: 10px 12px; + margin: 4px 0 8px; +} + +.agent-tool.pending { border-color: rgba(210, 153, 34, 0.35); } +.agent-tool.success { border-color: rgba(63, 185, 80, 0.35); } +.agent-tool.error { border-color: rgba(248, 81, 73, 0.35); } +.agent-tool.confirm-required { border-color: rgba(210, 153, 34, 0.5); } + +.agent-tool-header { + display: flex; + align-items: center; + gap: 10px; + min-height: 22px; +} + +.agent-step-number { + width: 18px; + height: 18px; + display: flex; + align-items: center; + justify-content: center; + border-radius: 50%; + background: var(--border-subtle); + color: var(--fg-mute); + font-size: 10px; + font-weight: 700; + flex: 0 0 auto; +} + +.agent-tool.success .agent-step-number { background: rgba(63, 185, 80, 0.2); color: var(--success); } +.agent-tool.error .agent-step-number { background: rgba(248, 81, 73, 0.15); color: var(--error); } +.agent-tool.pending .agent-step-number { background: rgba(210, 153, 34, 0.15); color: var(--warn); } + +.agent-tool-icon { + width: 16px; + height: 16px; + display: flex; + align-items: center; + justify-content: center; + color: var(--warn); + flex: 0 0 auto; +} + +.agent-tool.success .agent-tool-icon { color: var(--success); } +.agent-tool.error .agent-tool-icon { color: var(--error); } +.agent-tool-icon svg { width: 14px; height: 14px; fill: currentColor; } + +.agent-tool-title { + display: flex; + align-items: center; + gap: 8px; + min-width: 0; + flex: 1 1 auto; +} + +.agent-tool-kind { + color: var(--fg); + font-weight: 600; + font-size: 13px; +} + +.agent-tool-target { + color: var(--fg-dim); + font-size: 12px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; +} + +.agent-tool-status { + margin-left: auto; + display: flex; + align-items: center; + gap: 5px; + font-size: 11px; + color: var(--fg-mute); + font-weight: 600; +} + +.agent-tool.success .agent-tool-status { color: var(--success); } +.agent-tool.error .agent-tool-status { color: var(--error); } + +.agent-tool-status .spinner { + width: 11px; + height: 11px; + border: 2px solid currentColor; + border-top-color: transparent; + border-radius: 50%; + animation: spin 1s linear infinite; +} + +.agent-tool-status .status-icon { + width: 12px; + height: 12px; + display: flex; + align-items: center; + justify-content: center; +} + +.agent-tool-status .status-icon svg { width: 12px; height: 12px; fill: currentColor; } + +.agent-tool-details { margin-left: 0; border-left: none; padding-left: 28px; } + +.agent-tool-detail { margin-top: 4px; } + +.agent-tool-detail-toggle { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 4px 8px; + border-radius: var(--radius-sm); + cursor: pointer; + user-select: none; + font-size: 11px; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.04em; + color: var(--fg-mute); + transition: background var(--transition-fast), color var(--transition-fast); +} + +.agent-tool-detail-toggle:hover { background: rgba(230, 237, 243, 0.05); color: var(--fg-dim); } + +.agent-tool-detail-toggle .chevron { + font-size: 8px; + transition: transform var(--transition-fast); + display: inline-block; +} + +.agent-tool-detail-toggle.open .chevron { transform: rotate(90deg); } + +.agent-tool-detail-body { + max-height: 0; + overflow: hidden; + transition: max-height 0.2s ease, padding 0.2s ease; + padding: 0 6px; +} + +.agent-tool-detail-body.open { + max-height: 800px; + padding: 6px; +} + +.agent-tool-detail-body pre { + background: var(--bg-code); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-md); + padding: 10px 12px; + margin: 0; + font-family: inherit; + font-size: 11px; + white-space: pre-wrap; + word-break: break-word; +} + +.agent-confirm-row { + display: flex; + align-items: center; + gap: 10px; + flex-wrap: wrap; +} + +.agent-confirm-actions { display: flex; gap: 6px; } + +.agent-confirm-btn { + appearance: none; + padding: 5px 12px; + border-radius: var(--radius-md); + border: 1px solid var(--border-subtle); + background: var(--bg-elev); + color: var(--fg-dim); + font-size: 12px; + font-weight: 600; + font-family: inherit; + cursor: pointer; + transition: background var(--transition-fast), border-color var(--transition-fast), color var(--transition-fast), transform 0.1s; + display: inline-flex; + align-items: center; + gap: 4px; +} + +.agent-confirm-btn:hover { background: var(--bg); color: var(--fg); } +.agent-confirm-btn:active { transform: translateY(1px); } +.agent-confirm-btn:disabled { opacity: 0.45; cursor: not-allowed; transform: none; } +.agent-confirm-btn svg { width: 14px; height: 14px; fill: currentColor; } + +.agent-confirm-btn.approve { + background: rgba(63, 185, 80, 0.10); + border-color: rgba(63, 185, 80, 0.25); + color: var(--success); +} + +.agent-confirm-btn.approve:hover { background: rgba(63, 185, 80, 0.16); border-color: rgba(63, 185, 80, 0.4); } + +.agent-confirm-btn.deny { + background: rgba(248, 81, 73, 0.08); + border-color: rgba(248, 81, 73, 0.25); + color: var(--error); +} + +.agent-confirm-btn.deny:hover { background: rgba(248, 81, 73, 0.14); border-color: rgba(248, 81, 73, 0.4); } + +.agent-confirm-btn .spinner { + width: 10px; + height: 10px; + border: 2px solid currentColor; + border-top-color: transparent; + border-radius: 50%; + animation: spin 1s linear infinite; +} + +.agent-thinking-block { + margin: 8px 0 10px; + border-left: 2px solid rgba(88, 166, 255, 0.25); + padding-left: 10px; +} + +.agent-thinking-toggle { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 4px 8px; + border-radius: var(--radius-sm); + cursor: pointer; + user-select: none; + font-size: 11px; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.04em; + color: var(--fg-mute); + transition: background var(--transition-fast); +} + +.agent-thinking-toggle:hover { background: rgba(230, 237, 243, 0.05); } + +.agent-thinking-toggle .chevron { + font-size: 8px; + transition: transform var(--transition-fast); + display: inline-block; +} + +.agent-thinking-toggle.open .chevron { transform: rotate(90deg); } + +.agent-thinking-content { + font-size: 12px; + color: var(--fg-dim); + font-style: italic; + line-height: 1.55; + white-space: pre-wrap; + max-height: 0; + overflow: hidden; + transition: max-height 0.2s ease, padding 0.2s ease; + padding: 0 6px; +} + +.agent-thinking-content.open { + max-height: 2000px; + padding: 6px; +} + +/* ------------------------------------------------------------------ + Slash command palette + ------------------------------------------------------------------ */ +.command-palette { + position: absolute; + left: 0; + right: 0; + bottom: calc(100% + 8px); + background: var(--bg-elev); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-lg); + box-shadow: var(--shadow-lg); + padding: 6px; + z-index: 50; + display: none; + flex-direction: column; + gap: 2px; + max-height: 260px; + overflow-y: auto; +} + +.command-palette.open { display: flex; } + +.command-item { + display: flex; + align-items: center; + gap: 10px; + padding: 8px 10px; + border-radius: var(--radius-sm); + cursor: pointer; + font-size: 13px; + color: var(--fg); + transition: background var(--transition-fast); +} + +.command-item:hover, +.command-item.selected { background: rgba(88, 166, 255, 0.12); } + +.command-item .cmd-name { + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + font-weight: 700; + color: var(--accent-bright); + min-width: 80px; +} + +.command-item .cmd-desc { color: var(--fg-dim); font-size: 12px; } +.command-item .cmd-hint { margin-left: auto; font-size: 11px; color: var(--fg-mute); } + +.command-hint { background: rgba(88, 166, 255, 0.05); border-color: rgba(88, 166, 255, 0.15); } +.command-hint .role { color: var(--accent-bright); } +.command-hint .content { font-size: 13px; color: var(--fg-dim); } +.command-hint .content strong { color: var(--fg); } +.command-hint .content code { + background: rgba(230, 237, 243, 0.08); + padding: 2px 5px; + border-radius: var(--radius-sm); + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + font-size: 12px; +} + +/* ------------------------------------------------------------------ + Agent controls + ------------------------------------------------------------------ */ +.agent-controls { + display: flex; + align-items: center; + gap: 16px; + font-size: 12px; + color: var(--fg-dim); + margin-top: 10px; +} + +.agent-controls label { + display: flex; + align-items: center; + gap: 6px; + cursor: pointer; +} + +.agent-controls input[type="checkbox"] { accent-color: var(--accent); } + +.agent-controls input[type="number"] { + width: 50px; + background: var(--bg-elev); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-sm); + color: var(--fg); + padding: 3px 6px; + font: inherit; + transition: border-color var(--transition-fast), box-shadow var(--transition-fast); +} + +.agent-controls input[type="number"]:hover { border-color: var(--border); } + +.agent-controls input[type="number"]:focus { + outline: none; + border-color: var(--accent); + box-shadow: 0 0 0 2px rgba(88, 166, 255, 0.12); +} + +@media (max-width: 640px) { + header { flex-wrap: wrap; } + .meta { width: 100%; justify-content: flex-end; } + .model-bar { flex-wrap: wrap; } + .model-select { min-width: 0; flex: 1; } + main { padding: 12px 16px; } + .input-area { padding: 10px 16px 16px; } + .ctx-meter { padding: 0 16px 10px; } + #settings-panel, #history-panel { width: 260px; } +} \ No newline at end of file diff --git a/src/arc_llama/static/chat.html b/src/arc_llama/static/chat.html index bb254d5..1e8a3a7 100644 --- a/src/arc_llama/static/chat.html +++ b/src/arc_llama/static/chat.html @@ -4,1430 +4,12 @@ Chat — arc-llama - + + + + +
@@ -1453,15 +35,16 @@

arc-llama

-
💬
+
Start a conversation
Select a model and send a message to begin chatting with arc-llama.
+
-
🤖
+
Agent mode
Switch to Agent mode, enter a task, and click Run to start a coding assistant session.
@@ -1511,6 +94,11 @@

arc-llama

Chat History
+
+ + + +
@@ -1529,1812 +117,7 @@

arc-llama

- + diff --git a/src/arc_llama/static/chat.js b/src/arc_llama/static/chat.js new file mode 100644 index 0000000..68dbab6 --- /dev/null +++ b/src/arc_llama/static/chat.js @@ -0,0 +1,2031 @@ +const $ = (sel) => document.querySelector(sel); +const chatLog = $("#chat-log"); +const emptyState = $("#empty-state"); +const modelSelect = $("#model-select"); +const modelStatus = $("#model-status"); +const statusText = $("#status-text"); +const input = $("#message-input"); +const sendButton = $("#send-button"); +const inputWrap = $("#input-wrap"); +const commandPalette = $("#command-palette"); +const attachButton = $("#attach-button"); +const pdfInput = $("#pdf-input"); +const attachmentStrip = $("#attachment-strip"); + +let models = []; +let selectedModel = null; +let loadingModel = null; +let generating = false; +let statusPoller = null; +let lastUsage = null; +let streamStartTime = null; +let streamTokenCount = 0; +const conversation = []; +let attachments = []; + +const agentLog = $("#agent-log"); +const agentEmptyState = $("#agent-empty-state"); +const modeToggle = $("#mode-toggle"); +const chatHint = $("#chat-hint"); +const agentControls = $("#agent-controls"); +const agentAutoConfirm = $("#agent-auto-confirm"); +const agentMaxTurns = $("#agent-max-turns"); + +let agentMode = false; +let agentRunning = false; +let agentAbort = null; + +const ctxMeter = $("#ctx-meter"); +const ctxBarFill = $("#ctx-bar-fill"); +const ctxLabelL = $("#ctx-label-left"); +const ctxLabelTps = $("#ctx-label-tps"); +const ctxLabelR = $("#ctx-label-right"); +const settingsToggle = $("#settings-toggle"); +const settingsPanel = $("#settings-panel"); +const sModelName = $("#s-model-name"); +const sFields = $("#s-fields"); +const sFeedback = $("#s-feedback"); + +const historyToggle = $("#history-toggle"); +const historyPanel = $("#history-panel"); +const hNew = $("#h-new"); +const hList = $("#h-list"); +const hExport = $("#h-export"); +const hImport = $("#h-import"); +const hImportInput = $("#h-import-input"); + +const HISTORY_KEY = "arc-llama-chats"; +const MAX_HISTORY = 50; + +// Configure Markdown renderer with syntax highlighting and safe defaults. +if (typeof marked !== "undefined") { + marked.use({ + gfm: true, + breaks: false, + headerIds: false, + mangle: false, + }); +} +const mdRenderer = { + code(code, language) { + const validLang = language && hljs.getLanguage(language) ? language : "plaintext"; + const highlighted = hljs.highlight(code, { language: validLang }).value; + const langLabel = validLang === "plaintext" ? "" : `${escapeHtml(validLang)}`; + return `
${langLabel}
${highlighted}
`; + }, + blockquote(quote) { + return `
${quote}
`; + }, + html(text) { + return escapeHtml(text); + }, +}; + +function attachCopyButtons(root) { + for (const btn of root.querySelectorAll(".copy-code-btn")) { + btn.addEventListener("click", async () => { + const code = btn.closest(".code-block-wrapper").querySelector("code"); + const text = code ? code.textContent : ""; + try { + await navigator.clipboard.writeText(text); + btn.classList.add("copied"); + btn.innerHTML = ``; + setTimeout(() => { + btn.classList.remove("copied"); + btn.innerHTML = ``; + }, 1500); + } catch (e) { + console.warn("Copy failed", e); + } + }); + } +} + +let currentChatId = null; +let chatCache = loadChatsFromStorage(); + +const KV_TYPES = ["f16","f32","q8_0","q5_1","q5_0","q4_1","q4_0"]; +const KV_CLASSES = ["default","moe_a3b","qwen3_27b_dense","gemma_swa"]; + +settingsToggle.addEventListener("click", () => { + const open = settingsPanel.classList.toggle("open"); + settingsToggle.classList.toggle("open", open); + if (open) renderSettingsPanel(); +}); + +historyToggle.addEventListener("click", () => { + const open = historyPanel.classList.toggle("open"); + historyToggle.classList.toggle("open", open); + if (open) renderHistoryPanel(); +}); + +hNew.addEventListener("click", newChat); + +if (hExport) hExport.addEventListener("click", exportChats); +if (hImport) hImport.addEventListener("click", () => hImportInput?.click()); +if (hImportInput) hImportInput.addEventListener("change", importChatsFromFile); + +function loadChatsFromStorage() { + try { + const raw = localStorage.getItem(HISTORY_KEY); + if (!raw) return []; + const parsed = JSON.parse(raw); + if (Array.isArray(parsed)) return parsed; + if (parsed && Array.isArray(parsed.chats)) return parsed.chats; + } catch (e) { + // storage may be full / disabled + } + return []; +} + +function loadChats() { + return chatCache; +} + +function saveChats(chats) { + chatCache = chats; + try { + localStorage.setItem(HISTORY_KEY, JSON.stringify(chats)); + } catch (e) { + // storage may be full / disabled + } +} + +function serverChatToLocal(data, modelHint) { + return { + id: data.id, + title: data.title || "New chat", + model: modelHint || null, + createdAt: Math.round((data.created_at || Date.now() / 1000) * 1000), + updatedAt: Math.round((data.updated_at || Date.now() / 1000) * 1000), + messages: (data.messages || []).map(m => ({ role: m.role, content: m.content })), + }; +} + +async function apiRequest(path, options = {}) { + const r = await fetch(path, options); + if (!r.ok) { + const t = await r.text(); + throw new Error(`${r.status} ${t}`); + } + return r.json(); +} + +async function syncChatsFromServer() { + try { + const data = await apiRequest("/v1/chats"); + const summaries = data.data || []; + const map = new Map(chatCache.map(c => [c.id, c])); + for (const s of summaries) { + const existing = map.get(s.id); + const updatedAt = Math.round((s.updated_at || 0) * 1000); + if (existing) { + existing.title = s.title; + existing.createdAt = Math.round((s.created_at || 0) * 1000); + existing.updatedAt = updatedAt; + existing.message_count = s.message_count; + } else { + map.set(s.id, { + id: s.id, + title: s.title, + model: null, + messages: [], + createdAt: Math.round((s.created_at || 0) * 1000), + updatedAt: updatedAt, + message_count: s.message_count, + }); + } + } + const merged = Array.from(map.values()).sort((a, b) => b.updatedAt - a.updatedAt).slice(0, MAX_HISTORY); + saveChats(merged); + if (historyPanel.classList.contains("open")) renderHistoryPanel(); + } catch (e) { + console.warn("Could not sync chats from server:", e.message); + } +} + +async function ensureServerChat(titleHint) { + if (currentChatId) return; + const title = truncateTitle(titleHint || "New chat"); + try { + const data = await apiRequest("/v1/chats", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ title }), + }); + currentChatId = data.id; + const now = Date.now(); + const chats = loadChats(); + chats.unshift(serverChatToLocal(data, selectedModel)); + chats[0].createdAt = now; + chats[0].updatedAt = now; + saveChats(chats); + } catch (e) { + console.warn("Could not create chat on server:", e.message); + // Local-only fallback so the UI keeps working offline. + const id = generateId(); + currentChatId = id; + const now = Date.now(); + const chats = loadChats(); + chats.unshift({ id, title, model: selectedModel, messages: [], createdAt: now, updatedAt: now }); + saveChats(chats); + } +} + +async function serverAppendMessages(chatId, messages, title) { + if (!chatId) return; + if ((!messages || messages.length === 0) && !title) return; + const body = {}; + if (messages && messages.length > 0) body.messages = messages; + if (title) body.title = title; + try { + await apiRequest(`/v1/chats/${encodeURIComponent(chatId)}`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + } catch (e) { + console.warn("Could not append messages to server:", e.message); + } +} + +function generateId() { + if (typeof crypto !== "undefined" && crypto.randomUUID) { + return crypto.randomUUID(); + } + return Date.now().toString(36) + Math.random().toString(36).slice(2); +} + +function truncateTitle(text, max = 60) { + if (!text) return "New chat"; + const single = text.replace(/\s+/g, " ").trim(); + if (single.length <= max) return single || "New chat"; + return single.slice(0, max - 1).trimEnd() + "…"; +} + +function formatRelativeTime(ms) { + const now = Date.now(); + const diff = now - ms; + const sec = Math.floor(diff / 1000); + if (sec < 10) return "just now"; + if (sec < 60) return `${sec}s ago`; + const min = Math.floor(sec / 60); + if (min < 60) return `${min}m ago`; + const hr = Math.floor(min / 60); + if (hr < 24) return `${hr}h ago`; + const day = Math.floor(hr / 24); + if (day === 1) return "yesterday"; + if (day < 7) return `${day} days ago`; + const d = new Date(ms); + return d.toLocaleDateString(undefined, { month: "short", day: "numeric" }); +} + +function saveCurrentChat() { + if (conversation.length === 0) return; + const firstUser = conversation.find(m => m.role === "user"); + const title = truncateTitle(firstUser ? firstUser.content : "New chat"); + const chats = loadChats(); + const now = Date.now(); + if (currentChatId) { + const idx = chats.findIndex(c => c.id === currentChatId); + if (idx >= 0) { + chats[idx] = { ...chats[idx], title, model: selectedModel, messages: [...conversation], updatedAt: now }; + } else { + chats.unshift({ id: currentChatId, title, model: selectedModel, messages: [...conversation], createdAt: now, updatedAt: now }); + } + } else { + const id = generateId(); + currentChatId = id; + chats.unshift({ id, title, model: selectedModel, messages: [...conversation], createdAt: now, updatedAt: now }); + } + chats.sort((a, b) => b.updatedAt - a.updatedAt); + while (chats.length > MAX_HISTORY) chats.pop(); + saveChats(chats); + // Keep the server-side title in sync (best-effort). + if (currentChatId) { + serverAppendMessages(currentChatId, [], title).catch(() => {}); + } +} + +async function newChat() { + conversation.length = 0; + currentChatId = null; + chatLog.innerHTML = ""; + chatLog.appendChild(emptyState); + emptyState.style.display = ""; + input.value = ""; + input.style.height = "auto"; + historyPanel.classList.remove("open"); + historyToggle.classList.remove("open"); + updateCtxMeter(0, models.find(m => m.id === selectedModel)?.ctx || 131072); + ctxMeter.classList.remove("visible"); + ctxLabelTps.textContent = ""; + await ensureServerChat("New chat"); + input.focus(); +} + +function renderHistoryPanel() { + const chats = loadChats(); + hList.innerHTML = ""; + if (chats.length === 0) { + hList.innerHTML = '
No saved chats yet.
'; + return; + } + for (const c of chats) { + const card = document.createElement("div"); + card.className = "h-card"; + card.dataset.id = c.id; + const ICON_CLOSE = ''; + card.innerHTML = ` +
${escapeHtml(c.title)}
+
+ ${escapeHtml(c.model || "unknown")} + ${formatRelativeTime(c.updatedAt)} +
+ + `; + card.addEventListener("click", (e) => { + if (e.target.closest(".h-delete")) return; + loadChat(c.id); + }); + card.querySelector(".h-delete").addEventListener("click", (e) => { + e.stopPropagation(); + deleteChat(c.id); + }); + hList.appendChild(card); + } +} + +async function loadChat(id) { + let chat = chatCache.find(c => c.id === id); + if (!chat) { + try { + const data = await apiRequest(`/v1/chats/${encodeURIComponent(id)}`); + chat = serverChatToLocal(data, null); + chatCache.push(chat); + saveChats(chatCache); + } catch (e) { + console.warn("Could not load chat from server:", e.message); + return; + } + } else if (!chat.messages || chat.messages.length === 0) { + try { + const data = await apiRequest(`/v1/chats/${encodeURIComponent(id)}`); + const updated = serverChatToLocal(data, chat.model); + const idx = chatCache.findIndex(c => c.id === id); + if (idx >= 0) chatCache[idx] = updated; else chatCache.push(updated); + saveChats(chatCache); + chat = updated; + } catch (e) { + console.warn("Could not load chat details from server:", e.message); + } + } + if (!chat) return; + conversation.length = 0; + if (Array.isArray(chat.messages)) { + conversation.push(...chat.messages); + } + currentChatId = chat.id; + chatLog.innerHTML = ""; + if (conversation.length === 0) { + chatLog.appendChild(emptyState); + emptyState.style.display = ""; + } else { + for (const m of conversation) { + if (m.role === "assistant") { + const { div, content } = createMessage("assistant", m.content || ""); + if (m.thinking) renderThinking(div, m.thinking); + if (m.content) renderMarkdown(content, m.content); + } else { + createMessage(m.role, m.content || ""); + } + } + } + if (chat.model && models.some(m => m.id === chat.model)) { + selectedModel = chat.model; + modelSelect.value = chat.model; + loadingModel = null; + updatePickerStatus(); + } + historyPanel.classList.remove("open"); + historyToggle.classList.remove("open"); + const m = models.find(x => x.id === selectedModel); + updateCtxMeter(estimateTokens(), m?.ctx || 131072); + if (shouldAutoScroll(chatLog)) autoScroll(chatLog); + input.focus(); +} + +async function deleteChat(id) { + try { + await apiRequest(`/v1/chats/${encodeURIComponent(id)}`, { method: "DELETE" }); + } catch (e) { + console.warn("Could not delete chat on server:", e.message); + } + const chats = loadChats().filter(c => c.id !== id); + saveChats(chats); + if (currentChatId === id) { + currentChatId = null; + } + renderHistoryPanel(); +} + +async function exportChats() { + try { + const data = await apiRequest("/v1/chats/export"); + const blob = new Blob([JSON.stringify(data, null, 2)], { type: "application/json" }); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = `arc-llama-chats-${new Date().toISOString().slice(0, 10)}.json`; + document.body.appendChild(a); + a.click(); + a.remove(); + URL.revokeObjectURL(url); + } catch (e) { + console.warn("Could not export chats:", e.message); + showError("Export failed: " + e.message); + } +} + +async function importChatsFromFile() { + const file = hImportInput.files?.[0]; + if (!file) return; + hImportInput.value = ""; + let body; + try { + const text = await file.text(); + body = JSON.parse(text); + } catch (e) { + showError("Import failed: invalid JSON file"); + return; + } + const chats = body.chats; + if (!Array.isArray(chats)) { + showError("Import failed: missing 'chats' array"); + return; + } + try { + const r = await fetch("/v1/chats/import", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ chats, overwrite: false }), + }); + const data = await r.json().catch(() => ({})); + if (!r.ok) throw new Error(data.detail || `HTTP ${r.status}`); + await syncChatsFromServer(); + showError(`Imported ${data.imported || 0}, skipped ${data.skipped || 0}, errors ${data.errors || 0}.`); + } catch (e) { + showError("Import failed: " + e.message); + } +} + +function renderSettingsPanel() { + const m = models.find(m => m.id === selectedModel); + sModelName.textContent = selectedModel || "—"; + if (!m || (m.owned_by && m.owned_by.startsWith("upstream:"))) { + sFields.innerHTML = '
Settings not available for upstream models.
'; + return; + } + const ctx = m.ctx ?? 32768; + const ctk = m.cache_type_k ?? "q8_0"; + const ctv = m.cache_type_v ?? "q8_0"; + const parallel = m.parallel ?? 1; + const kvClass = m.kv_class ?? "default"; + + const kvOpts = KV_TYPES.map(v => ``).join(""); + const kvOptsV = KV_TYPES.map(v => ``).join(""); + const classOpts = KV_CLASSES.map(v => ``).join(""); + + sFields.innerHTML = ` +
+
+
+
+
+
+
+
+
+
+ +
Takes effect on next model load.
+ `; + $("#s-apply").addEventListener("click", applySettings); +} + +async function applySettings() { + const m = models.find(m => m.id === selectedModel); + if (!m) return; + const btn = $("#s-apply"); + btn.disabled = true; + sFeedback.textContent = ""; + const body = { + ctx: parseInt($("#s-ctx").value, 10), + cache_type_k: $("#s-ctk").value, + cache_type_v: $("#s-ctv").value, + parallel: parseInt($("#s-par").value, 10), + kv_class: $("#s-kvc").value, + }; + try { + const r = await fetch(`/admin/models/${encodeURIComponent(selectedModel)}/edit`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + const data = await r.json(); + if (!r.ok) throw new Error(data.detail || r.status); + m.ctx = body.ctx; + m.cache_type_k = body.cache_type_k; + m.cache_type_v = body.cache_type_v; + m.parallel = body.parallel; + m.kv_class = body.kv_class; + sFeedback.style.color = "var(--accent-bright)"; + sFeedback.textContent = "Saved."; + } catch (e) { + sFeedback.style.color = "#e8b0b0"; + sFeedback.textContent = "Error: " + e.message; + } finally { + btn.disabled = false; + } +} + +function estimateTokens() { + const chars = conversation.reduce((n, m) => n + (m.content || "").length, 0); + return Math.round(chars / 4); +} + +function updateCtxMeter(tokens, ctx) { + if (!ctx) return; + const pct = Math.min(100, tokens / ctx * 100); + ctxBarFill.style.width = pct.toFixed(1) + "%"; + ctxBarFill.className = "ctx-bar-fill" + (pct >= 90 ? " critical" : pct >= 70 ? " warn" : ""); + ctxLabelL.textContent = `${tokens.toLocaleString()} / ${ctx.toLocaleString()} tokens`; + ctxLabelR.textContent = pct.toFixed(1) + "%"; + ctxMeter.classList.add("visible"); +} + +async function fetchModels() { + try { + const r = await fetch("/v1/models"); + if (!r.ok) throw new Error(`status ${r.status}`); + const data = await r.json(); + const local = (data.data || []).filter(m => m.object === "model" && m.owned_by !== "arc-llama-alias"); + models = local; + renderModelPicker(); + } catch (e) { + showError("Could not fetch models: " + e.message); + } +} + +function renderModelPicker() { + const current = selectedModel || modelSelect.value; + modelSelect.innerHTML = ""; + if (models.length === 0) { + const opt = document.createElement("option"); + opt.textContent = "No models available"; + opt.disabled = true; + opt.selected = true; + modelSelect.appendChild(opt); + selectedModel = null; + updateStatus("swapping"); + return; + } + for (const m of models) { + const opt = document.createElement("option"); + opt.value = m.id; + opt.textContent = m.id; + modelSelect.appendChild(opt); + } + if (current && models.some(m => m.id === current)) { + modelSelect.value = current; + selectedModel = current; + } else { + selectedModel = models[0].id; + modelSelect.value = selectedModel; + } +} + +async function fetchStatus() { + try { + const r = await fetch("/admin/status"); + if (!r.ok) return; + const data = await r.json(); + const modelMap = new Map((data.models || []).map(m => [m.name, m])); + models = models.map(m => { + const s = modelMap.get(m.id); + if (s) { + m.loaded = s.loaded; + m.ctx = s.ctx ?? m.ctx; + m.cache_type_k = s.cache_type_k ?? m.cache_type_k; + m.cache_type_v = s.cache_type_v ?? m.cache_type_v; + m.kv_class = s.kv_class ?? m.kv_class; + } + return m; + }); + updatePickerStatus(); + } catch (e) { + // silent — the chat endpoint will surface real errors + } +} + +function updatePickerStatus() { + const m = models.find(m => m.id === selectedModel); + if (!m) { + updateStatus("swapping"); + return; + } + if (loadingModel === selectedModel) { + updateStatus("loading"); + } else if (m.loaded) { + updateStatus("ready"); + } else { + updateStatus("swapping"); + } +} + +function updateStatus(state) { + modelStatus.className = "model-status " + state; + statusText.textContent = state; +} + +modelSelect.addEventListener("change", () => { + selectedModel = modelSelect.value; + loadingModel = null; + updatePickerStatus(); + if (settingsPanel.classList.contains("open")) renderSettingsPanel(); +}); + +function createMessage(role, text = "") { + if (emptyState) emptyState.style.display = "none"; + const div = document.createElement("div"); + div.className = "message " + role; + const roleLabel = document.createElement("div"); + roleLabel.className = "role"; + roleLabel.textContent = role === "user" ? "You" : role === "system" ? "System" : "Assistant"; + div.appendChild(roleLabel); + if (role === "assistant") { + const indicator = document.createElement("span"); + indicator.id = "streaming-indicator"; + indicator.textContent = "●"; + indicator.style.color = "var(--accent-bright)"; + indicator.style.opacity = "0"; + roleLabel.appendChild(indicator); + const thinkingBlock = document.createElement("div"); + thinkingBlock.className = "thinking-block"; + thinkingBlock.style.display = "none"; + const thinkingToggle = document.createElement("div"); + thinkingToggle.className = "thinking-toggle"; + thinkingToggle.innerHTML = 'Thinking'; + thinkingToggle.addEventListener("click", () => { + thinkingToggle.classList.toggle("open"); + thinkingContent.classList.toggle("open"); + }); + const thinkingContent = document.createElement("div"); + thinkingContent.className = "thinking-content"; + thinkingBlock.appendChild(thinkingToggle); + thinkingBlock.appendChild(thinkingContent); + div.appendChild(thinkingBlock); + } + const content = document.createElement("div"); + content.className = "content"; + content.textContent = text; + div.appendChild(content); + chatLog.appendChild(div); + if (shouldAutoScroll(chatLog)) autoScroll(chatLog); + return { div, content }; +} + +function showError(text) { + const { content } = createMessage("error", text); + content.parentElement.classList.add("error-card"); + content.parentElement.querySelector(".role").textContent = "Error"; +} + +async function ensureModelLoaded() { + const m = models.find(m => m.id === selectedModel); + if (!m) throw new Error("No model selected"); + if (m.loaded || (m.owned_by && m.owned_by.startsWith("upstream:"))) return; + loadingModel = selectedModel; + updatePickerStatus(); + try { + const r = await fetch(`/admin/load/${encodeURIComponent(selectedModel)}`, { method: "POST" }); + if (!r.ok) { + const t = await r.text(); + throw new Error(`Load failed: ${r.status} ${t}`); + } + m.loaded = true; + } catch (e) { + throw e; + } finally { + loadingModel = null; + updatePickerStatus(); + } +} + +async function sendMessage() { + const text = input.value.trim(); + if (generating || !selectedModel) return; + if (!text && !hasReadyAttachments()) return; + if (hasProcessingAttachments()) { + showError("Please wait for attachments to finish processing."); + return; + } + + const attachmentText = buildAttachmentText(); + const fullText = text + ? attachmentText ? `${text}\n\n${attachmentText}` : text + : attachmentText; + + input.value = ""; + input.style.height = "auto"; + clearAttachments(); + conversation.push({ role: "user", content: fullText }); + createMessage("user", fullText); + + // Persist this conversation on the server (best-effort). + await ensureServerChat(fullText); + serverAppendMessages(currentChatId, [{ role: "user", content: fullText }]); + + generating = true; + sendButton.disabled = true; + inputWrap.classList.add("generating"); + const streamingDot = $("#streaming-indicator"); + if (streamingDot) streamingDot.style.opacity = "1"; + + try { + await ensureModelLoaded(); + } catch (e) { + showError(e.message); + finishGeneration(); + return; + } + + const assistantMsg = createMessage("assistant"); + conversation.push({ role: "assistant", content: "", thinking: "" }); + const convoIndex = conversation.length - 1; + + let streamRaw = ""; + let lastDisplayedContent = ""; + let currentThinking = ""; + + try { + const r = await fetch("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: selectedModel, + messages: conversation.slice(0, -1), + stream: true, + stream_options: { include_usage: true }, + }), + }); + if (!r.ok) { + const t = await r.text(); + throw new Error(`${r.status} ${t}`); + } + const reader = r.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop(); + for (const line of lines) { + const chunkText = processSseLine(line); + if (chunkText != null) { + if (streamStartTime === null) streamStartTime = Date.now(); + streamTokenCount += Math.max(1, Math.round(chunkText.length / 4)); + streamRaw += chunkText; + const parsed = parseThinking(streamRaw); + if (!parsed.hasPartialTag) { + const newContent = parsed.content.slice(lastDisplayedContent.length); + lastDisplayedContent = parsed.content; + currentThinking = parsed.thinking; + if (newContent) { + conversation[convoIndex].content = parsed.content; + appendChunk(assistantMsg.content, newContent); + if (shouldAutoScroll(chatLog)) autoScroll(chatLog); + } + const thinkingBlock = assistantMsg.div.querySelector(".thinking-block"); + const thinkingContent = assistantMsg.div.querySelector(".thinking-content"); + if (thinkingBlock && thinkingContent) { + const trimmedThinking = currentThinking.trim(); + if (trimmedThinking) { + thinkingBlock.style.display = ""; + thinkingContent.textContent = trimmedThinking; + } else { + thinkingBlock.style.display = "none"; + thinkingContent.textContent = ""; + } + } + } + } + } + } + const finalParsed = parseThinking(streamRaw); + conversation[convoIndex].content = finalParsed.content; + conversation[convoIndex].thinking = finalParsed.thinking; + renderMarkdown(assistantMsg.content, finalParsed.content); + const m = models.find(m => m.id === selectedModel); + const completionToks = lastUsage ? lastUsage.completion_tokens : streamTokenCount; + const totalToks = lastUsage ? lastUsage.total_tokens : estimateTokens(); + const elapsed = streamStartTime ? (Date.now() - streamStartTime) / 1000 : null; + const tps = (elapsed && elapsed > 0 && completionToks > 0) + ? (completionToks / elapsed).toFixed(1) + : null; + if (tps) ctxLabelTps.textContent = tps + " tok/s"; + updateCtxMeter(totalToks, m?.ctx || 131072); + serverAppendMessages(currentChatId, [{ role: "assistant", content: conversation[convoIndex].content }]); + saveCurrentChat(); + } catch (e) { + assistantMsg.div.remove(); + conversation.pop(); + showError("Generation failed: " + e.message); + } finally { + lastUsage = null; + streamStartTime = null; + streamTokenCount = 0; + finishGeneration(); + } +} + +function processSseLine(line) { + const trimmed = line.trim(); + if (!trimmed || !trimmed.startsWith("data:")) return null; + const payload = trimmed.slice(5).trim(); + if (payload === "[DONE]") return null; + try { + const obj = JSON.parse(payload); + if (obj.usage) lastUsage = obj.usage; + const delta = obj.choices?.[0]?.delta; + if (!delta) return null; + let text = ""; + if (delta.reasoning_content) { + text += "" + delta.reasoning_content + ""; + } + if (delta.content != null) { + text += delta.content; + } + return text || null; + } catch (e) { + return null; + } +} + +function parseThinking(text) { + const tail = text.slice(-15); + const lastLt = tail.lastIndexOf("<"); + if (lastLt !== -1) { + const afterLt = tail.slice(lastLt); + const possible = ["", "", "", ""]; + for (const tag of possible) { + if (tag.startsWith(afterLt) && afterLt.length < tag.length) { + return { thinking: "", content: text, hasPartialTag: true }; + } + } + } + let thinking = ""; + let content = text; + const thinkMatches = [...text.matchAll(/([\s\S]*?)<\/think>/g)]; + for (const m of thinkMatches) thinking += (thinking ? "\n" : "") + m[1]; + content = content.replace(/[\s\S]*?<\/think>/g, ""); + const thinkingMatches = [...text.matchAll(/([\s\S]*?)<\/thinking>/g)]; + for (const m of thinkingMatches) thinking += (thinking ? "\n" : "") + m[1]; + content = content.replace(/[\s\S]*?<\/thinking>/g, ""); + const unclosedThink = content.match(/([\s\S]*)$/); + const unclosedThinking = content.match(/([\s\S]*)$/); + if (unclosedThink) { + thinking += (thinking ? "\n" : "") + unclosedThink[1]; + content = content.replace(/[\s\S]*$/, ""); + } else if (unclosedThinking) { + thinking += (thinking ? "\n" : "") + unclosedThinking[1]; + content = content.replace(/[\s\S]*$/, ""); + } + return { thinking: thinking.trim(), content: content.trimEnd(), hasPartialTag: false }; +} + +function appendChunk(container, text) { + const span = document.createElement("span"); + span.className = "token-chunk"; + span.textContent = text; + container.appendChild(span); + requestAnimationFrame(() => span.classList.add("revealed")); +} + +function renderMarkdown(container, text) { + if (typeof marked === "undefined" || typeof hljs === "undefined") { + container.textContent = text; + return; + } + const raw = text + .replace(/[\s\S]*?<\/think>/g, "") + .replace(/[\s\S]*?<\/thinking>/g, "") + .replace(/[\n\r]+$/, "") + .trimEnd(); + try { + let html = marked.parse(raw, { renderer: mdRenderer }); + html = html.replace(/

\s*<\/p>/g, "").replace(/

<\/p>/g, ""); + container.innerHTML = html; + attachCopyButtons(container); + } catch (e) { + console.warn("Markdown render failed, falling back to plain text", e); + container.textContent = text; + } +} + +function escapeHtml(s) { + return s + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} + +function renderThinking(messageDiv, thinkingText) { + const thinkingBlock = messageDiv.querySelector(".thinking-block"); + if (!thinkingBlock) return; + const thinkingContent = thinkingBlock.querySelector(".thinking-content"); + const trimmed = String(thinkingText || "").trim(); + thinkingContent.textContent = trimmed; + thinkingBlock.style.display = trimmed ? "" : "none"; +} + +function finishGeneration() { + generating = false; + sendButton.disabled = false; + inputWrap.classList.remove("generating"); + const indicator = $("#streaming-indicator"); + if (indicator) indicator.style.opacity = "0"; + // Collapse any thinking blocks that are open and hide empty ones + const openThinking = document.querySelectorAll(".thinking-toggle.open"); + for (const t of openThinking) { + t.classList.remove("open"); + t.nextElementSibling?.classList.remove("open"); + } + for (const block of document.querySelectorAll(".thinking-block")) { + const content = block.querySelector(".thinking-content"); + if (content && !content.textContent.trim()) { + block.style.display = "none"; + } + } + input.focus(); +} + +// ------------------------------------------------------------------ +// File attachments +// ------------------------------------------------------------------ + +const TEXT_EXTENSIONS = new Set([".txt", ".md", ".py", ".json", ".yaml", ".yml", ".csv"]); + +function isTextFile(file) { + if (file.type.startsWith("text/")) return true; + const name = file.name.toLowerCase(); + for (const ext of TEXT_EXTENSIONS) { + if (name.endsWith(ext)) return true; + } + return false; +} + +function isPdfFile(file) { + return file.type === "application/pdf" || file.name.toLowerCase().endsWith(".pdf"); +} + +function generateAttachmentId() { + if (typeof crypto !== "undefined" && crypto.randomUUID) return crypto.randomUUID(); + return Date.now().toString(36) + Math.random().toString(36).slice(2); +} + +function renderAttachments() { + attachmentStrip.innerHTML = ""; + if (attachments.length === 0) return; + for (const a of attachments) { + const chip = document.createElement("div"); + chip.className = "attachment-chip" + (a.error ? " error" : a.processing ? " processing" : ""); + chip.dataset.id = a.id; + const ICON_CLIP = ''; + const ICON_CLOSE = ''; + const icon = a.processing ? '' + : a.error ? '!' + : `${ICON_CLIP}`; + chip.innerHTML = ` + ${icon} + ${escapeHtml(a.file.name)} + + `; + chip.querySelector(".remove").addEventListener("click", () => removeAttachment(a.id)); + attachmentStrip.appendChild(chip); + } +} + +function addAttachment(file) { + const id = generateAttachmentId(); + const a = { id, file, text: "", processing: true, error: "" }; + attachments.push(a); + renderAttachments(); + processAttachment(a).finally(renderAttachments); +} + +async function processAttachment(a) { + try { + if (isPdfFile(a.file)) { + const form = new FormData(); + form.append("file", a.file); + const r = await fetch("/admin/parse-pdf", { method: "POST", body: form }); + const data = await r.json().catch(() => ({})); + if (!r.ok) throw new Error(data.detail || `HTTP ${r.status}`); + a.text = data.text || ""; + } else if (isTextFile(a.file)) { + a.text = await new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(String(reader.result || "")); + reader.onerror = () => reject(new Error("Could not read file")); + reader.readAsText(a.file); + }); + } else { + throw new Error("Unsupported file type"); + } + a.error = ""; + } catch (e) { + a.error = e.message; + a.text = ""; + } finally { + a.processing = false; + } +} + +function removeAttachment(id) { + attachments = attachments.filter(a => a.id !== id); + renderAttachments(); +} + +function clearAttachments() { + attachments = []; + renderAttachments(); +} + +function buildAttachmentText() { + const parts = []; + for (const a of attachments) { + if (a.error || a.processing || !a.text) continue; + parts.push(`[Attachment: ${a.file.name}]\n${a.text.trim()}`); + } + return parts.join("\n\n"); +} + +function hasReadyAttachments() { + return attachments.some(a => !a.processing && !a.error && a.text); +} + +function hasProcessingAttachments() { + return attachments.some(a => a.processing); +} + +attachButton.addEventListener("click", () => pdfInput.click()); + +pdfInput.addEventListener("change", () => { + const files = Array.from(pdfInput.files || []); + pdfInput.value = ""; + for (const file of files) addAttachment(file); +}); + +// Drag-and-drop on the input area +inputWrap.addEventListener("dragover", (e) => { + e.preventDefault(); + e.stopPropagation(); + inputWrap.style.borderColor = "var(--accent-bright)"; +}); +inputWrap.addEventListener("dragleave", (e) => { + e.preventDefault(); + e.stopPropagation(); + inputWrap.style.borderColor = ""; +}); +inputWrap.addEventListener("drop", (e) => { + e.preventDefault(); + e.stopPropagation(); + inputWrap.style.borderColor = ""; + const files = Array.from(e.dataTransfer?.files || []); + for (const file of files) addAttachment(file); +}); + +// ------------------------------------------------------------------ +// Agent mode +// ------------------------------------------------------------------ + +function setMode(mode) { + agentMode = mode === "agent"; + for (const btn of modeToggle.querySelectorAll("button")) { + btn.classList.toggle("active", btn.dataset.mode === mode); + } + chatLog.classList.toggle("hidden", agentMode); + agentLog.classList.toggle("active", agentMode); + chatHint.style.display = agentMode ? "none" : ""; + agentControls.style.display = agentMode ? "flex" : "none"; + attachButton.style.display = agentMode ? "none" : "flex"; + attachmentStrip.style.display = agentMode ? "none" : "flex"; + sendButton.title = agentMode ? "Run task" : "Send"; + input.placeholder = agentMode ? "Describe the coding task…" : "Message arc-llama…"; + ctxMeter.classList.toggle("visible", !agentMode); + hideCommandPalette(); + input.focus(); +} + +for (const btn of modeToggle.querySelectorAll("button")) { + btn.addEventListener("click", () => setMode(btn.dataset.mode)); +} + +const ICON_SEARCH = ''; +const ICON_READ = ''; +const ICON_WRITE = ''; +const ICON_COMMAND = ''; +const ICON_LIST = ''; +const ICON_CHECK = ''; +const ICON_X = ''; +const ICON_WARN = ''; +const ICON_BOT = ''; + +function getToolIcon(name) { + if (name.includes("search")) return ICON_SEARCH; + if (name.includes("write")) return ICON_WRITE; + if (name.includes("read")) return ICON_READ; + if (name === "list_directory") return ICON_LIST; + if (name === "run_command") return ICON_COMMAND; + return ICON_BOT; +} + +function getToolKind(name) { + if (name.includes("search")) return "Search"; + if (name.includes("write")) return "Write file"; + if (name.includes("read")) return "Read file"; + if (name === "list_directory") return "List directory"; + if (name === "run_command") return "Shell"; + return "Tool"; +} + +function makeToggle(container, toggleClass, bodyClass) { + const toggle = container.querySelector(toggleClass); + const body = container.querySelector(bodyClass); + if (!toggle || !body) return; + toggle.classList.toggle("open"); + body.classList.toggle("open"); +} + +// Robot llama mascot for the agent background. Every frame is built from the +// same skeleton via center()/llamaBox() so widths can never drift between +// frames - that's what kept the old hand-typed art from jittering as it +// "animated" by swapping state. +const LLAMA_WIDTH = 24; + +function center(str) { + str = str || ""; + const pad = LLAMA_WIDTH - str.length; + const left = Math.floor(pad / 2); + const right = pad - left; + return " ".repeat(Math.max(left, 0)) + str + " ".repeat(Math.max(right, 0)); +} + +function llamaBox(left, innerWidth, content, right) { + content = content || ""; + const pad = innerWidth - content.length; + const padLeft = Math.floor(pad / 2); + const padRight = pad - padLeft; + const inner = " ".repeat(Math.max(padLeft, 0)) + content + " ".repeat(Math.max(padRight, 0)); + return center(left + inner + right); +} + +function llamaFrame({ eyes = "", mouth = "", panel1 = "", panel2 = "" }) { + return [ + center("/\\ /\\"), + center("/ \\ / \\"), + center(".------------."), + llamaBox("|", 12, eyes, "|"), + llamaBox("|", 12, "", "|"), + llamaBox("|", 12, mouth, "|"), + center("'----, ,----'"), + center("| |"), + center(".--'--'--."), + llamaBox("| ", 8, panel1, " |"), + llamaBox("| ", 8, panel2, " |"), + center("'--------'"), + ].join("\n"); +} + +// Each state is a list of frame descriptors cycled by the interval below - +// eyes/mouth/panel text change, the skeleton never does. +const AGENT_ASCII = { + idle: [ + { eyes: "o o", mouth: "____", panel1: "ARC-LLAMA", panel2: "" }, + { eyes: "- -", mouth: "____", panel1: "ARC-LLAMA", panel2: "" }, + ], + thinking: [ + { eyes: "o o", mouth: "....", panel1: "THINKING", panel2: "" }, + { eyes: "o o", mouth: "....", panel1: "THINKING", panel2: "." }, + { eyes: "o o", mouth: "....", panel1: "THINKING", panel2: ".." }, + { eyes: "o o", mouth: "....", panel1: "THINKING", panel2: "..." }, + ], + read: [ + { eyes: "v v", mouth: "____", panel1: "READING", panel2: "[= ]" }, + { eyes: "v v", mouth: "____", panel1: "READING", panel2: "[ = ]" }, + { eyes: "v v", mouth: "____", panel1: "READING", panel2: "[ =]" }, + { eyes: "v v", mouth: "____", panel1: "READING", panel2: "[ = ]" }, + ], + write: [ + { eyes: "o o", mouth: "____", panel1: "WRITING", panel2: "code_" }, + { eyes: "o o", mouth: "____", panel1: "WRITING", panel2: "code " }, + ], + command: [ + { eyes: "o o", mouth: "____", panel1: "$ EXEC", panel2: "/" }, + { eyes: "o o", mouth: "____", panel1: "$ EXEC", panel2: "-" }, + { eyes: "o o", mouth: "____", panel1: "$ EXEC", panel2: "\\" }, + { eyes: "o o", mouth: "____", panel1: "$ EXEC", panel2: "|" }, + ], + search: [ + { eyes: "O O", mouth: "____", panel1: "SEARCH", panel2: "[o ]" }, + { eyes: "O O", mouth: "____", panel1: "SEARCH", panel2: "[ o ]" }, + { eyes: "O O", mouth: "____", panel1: "SEARCH", panel2: "[ o ]" }, + { eyes: "O O", mouth: "____", panel1: "SEARCH", panel2: "[ o]" }, + ], + list: [ + { eyes: "o o", mouth: "____", panel1: "FILES", panel2: "* - -" }, + { eyes: "o o", mouth: "____", panel1: "FILES", panel2: "- * -" }, + { eyes: "o o", mouth: "____", panel1: "FILES", panel2: "- - *" }, + ], + working: [ + { eyes: "o o", mouth: "____", panel1: "WORKING", panel2: "/" }, + { eyes: "o o", mouth: "____", panel1: "WORKING", panel2: "-" }, + { eyes: "o o", mouth: "____", panel1: "WORKING", panel2: "\\" }, + { eyes: "o o", mouth: "____", panel1: "WORKING", panel2: "|" }, + ], + success: [ + { eyes: "^ ^", mouth: "\\__/", panel1: "DONE", panel2: "" }, + { eyes: "^ ^", mouth: "\\__/", panel1: "DONE", panel2: "\\o/" }, + ], + done: [ + { eyes: "^ ^", mouth: "\\__/", panel1: "DONE", panel2: "" }, + { eyes: "^ ^", mouth: "\\__/", panel1: "DONE", panel2: "\\o/" }, + ], + error: [ + { eyes: "x x", mouth: "/??\\", panel1: "ERROR", panel2: "!!!" }, + { eyes: "x x", mouth: "/??\\", panel1: "ERROR", panel2: "" }, + ], +}; + +const AGENT_ASCII_FRAME_MS = 450; +const agentAsciiBg = $("#agent-ascii-bg"); +let agentAsciiState = "idle"; +let agentAsciiFrameIndex = 0; + +function renderAgentAscii() { + if (!agentAsciiBg) return; + const frames = AGENT_ASCII[agentAsciiState] || AGENT_ASCII.idle; + agentAsciiBg.textContent = llamaFrame(frames[agentAsciiFrameIndex % frames.length]); + agentAsciiBg.className = "agent-ascii-bg state-" + agentAsciiState; +} + +function setAgentAscii(state) { + if (!agentAsciiBg) return; + if (state === agentAsciiState) return; + agentAsciiState = AGENT_ASCII[state] ? state : "idle"; + agentAsciiFrameIndex = 0; + renderAgentAscii(); +} + +renderAgentAscii(); +setInterval(() => { + agentAsciiFrameIndex += 1; + renderAgentAscii(); +}, AGENT_ASCII_FRAME_MS); + +function shouldAutoScroll(container) { + if (!container) return true; + const threshold = 60; + return container.scrollHeight - container.scrollTop - container.clientHeight <= threshold; +} + +function autoScroll(container) { + if (!container) return; + container.scrollTop = container.scrollHeight; +} + +const agentRenderer = { + thinkingEl: null, + toolCards: new Map(), + stepNumber: 0, + + clear() { + const bg = $("#agent-ascii-bg"); + agentLog.innerHTML = ""; + if (bg) agentLog.appendChild(bg); + if (agentEmptyState) agentEmptyState.style.display = ""; + this.toolCards.clear(); + this.thinkingEl = null; + this.stepNumber = 0; + setAgentAscii("idle"); + }, + + renderPrompt(task) { + if (agentEmptyState) agentEmptyState.style.display = "none"; + const el = document.createElement("div"); + el.className = "agent-prompt"; + el.textContent = task; + agentLog.appendChild(el); + }, + + setThinking(active) { + if (active) { + if (this.thinkingEl) return; + if (agentEmptyState) agentEmptyState.style.display = "none"; + const el = document.createElement("div"); + el.className = "agent-log-line thinking"; + el.innerHTML = `

Thinking
`; + agentLog.appendChild(el); + if (shouldAutoScroll(agentLog)) autoScroll(agentLog); + this.thinkingEl = el; + setAgentAscii("thinking"); + } else { + if (this.thinkingEl) { + this.thinkingEl.remove(); + this.thinkingEl = null; + } + } + }, + + addEvent(event) { + if (agentEmptyState) agentEmptyState.style.display = "none"; + switch (event.type) { + case "status": + this.renderStatus(event); + break; + case "assistant": + this.renderAssistant(event); + break; + case "tool_call": + this.renderToolCall(event); + break; + case "tool_result": + this.renderToolResult(event); + break; + case "confirm_required": + this.renderConfirm(event); + break; + case "error": + this.renderError(event); + break; + case "done": + this.renderDone(event); + break; + default: + this.renderRaw(event); + } + if (shouldAutoScroll(agentLog)) autoScroll(agentLog); + }, + + renderStatus(event) { + const el = document.createElement("div"); + el.className = "agent-log-line status"; + el.textContent = "# " + (event.message || ""); + agentLog.appendChild(el); + }, + + renderAssistant(event) { + this.setThinking(false); + const parsed = parseThinking(event.content || ""); + const line = document.createElement("div"); + line.className = "agent-log-line assistant"; + + let html = ""; + if (parsed.thinking) { + html += ` +
+
Thinking
+
+
`; + } + html += `
`; + line.innerHTML = html; + agentLog.appendChild(line); + + const thinkingContent = line.querySelector(".agent-thinking-content"); + if (thinkingContent && parsed.thinking) { + thinkingContent.textContent = parsed.thinking; + } + const assistantContent = line.querySelector(".agent-assistant"); + if (assistantContent) { + renderMarkdown(assistantContent, parsed.content); + } + + const toggle = line.querySelector(".agent-thinking-toggle"); + if (toggle) { + toggle.addEventListener("click", () => makeToggle(line, ".agent-thinking-toggle", ".agent-thinking-content")); + } + }, + + _makeToolDetail(label, content, open = false) { + const detail = document.createElement("div"); + detail.className = "agent-tool-detail"; + const trimmed = String(content || "").trimEnd(); + detail.innerHTML = ` +
${escapeHtml(label)}
+
${escapeHtml(trimmed)}
`; + const toggle = detail.querySelector(".agent-tool-detail-toggle"); + toggle.addEventListener("click", () => makeToggle(detail, ".agent-tool-detail-toggle", ".agent-tool-detail-body")); + return detail; + }, + + _asciiForTool(name) { + if (name.includes("search")) return "search"; + if (name.includes("write")) return "write"; + if (name.includes("read")) return "read"; + if (name === "list_directory") return "list"; + if (name === "run_command") return "command"; + return "working"; + }, + + renderToolCall(event) { + this.setThinking(false); + this.stepNumber += 1; + const id = event.id || `orphan-${Date.now()}-${Math.random().toString(36).slice(2)}`; + const name = event.name || "unknown"; + const args = event.arguments || {}; + const icon = getToolIcon(name); + const kind = getToolKind(name); + const target = this._toolTarget(name, args); + + setAgentAscii(this._asciiForTool(name)); + + const line = document.createElement("div"); + line.className = "agent-log-line tool agent-tool pending"; + line.dataset.callId = id; + line.innerHTML = ` +
+ ${this.stepNumber} + ${icon} + + ${escapeHtml(kind)} + ${target ? `${escapeHtml(target)}` : ""} + + Running… +
+
`; + agentLog.appendChild(line); + + const details = line.querySelector(".agent-tool-details"); + details.appendChild(this._makeToolDetail("Arguments", JSON.stringify(args, null, 2), false)); + this.toolCards.set(id, { el: line, step: this.stepNumber }); + }, + + _toolTarget(name, args) { + if (args.path) return String(args.path); + if (args.file_path) return String(args.file_path); + if (args.directory) return String(args.directory); + if (args.command) { + const cmd = String(args.command); + return cmd.length > 60 ? cmd.slice(0, 57) + "…" : cmd; + } + if (args.query) { + const q = String(args.query); + return q.length > 60 ? q.slice(0, 57) + "…" : q; + } + return ""; + }, + + renderToolResult(event) { + const id = event.id; + let card = id ? this.toolCards.get(id) : null; + if (!card) { + const pending = [...this.toolCards.values()].find((c) => c.el.classList.contains("pending")); + card = pending; + } + + if (!card) { + const line = document.createElement("div"); + line.className = "agent-log-line tool agent-tool" + (event.error ? " error" : " success"); + line.innerHTML = ` +
+ ${event.error ? ICON_X : ICON_CHECK} + ${escapeHtml(event.name || "")} + ${event.error ? "Failed" : "Done"} +
+
`; + agentLog.appendChild(line); + const details = line.querySelector(".agent-tool-details"); + details.appendChild(this._makeToolDetail("Result", event.content || "", true)); + return; + } + + const line = card.el; + line.classList.remove("pending"); + line.classList.add(event.error ? "error" : "success"); + + const status = line.querySelector(".agent-tool-status"); + if (status) { + status.innerHTML = event.error + ? `${ICON_X} Failed` + : `${ICON_CHECK} Done`; + } + + const iconWrap = line.querySelector(".agent-tool-icon"); + if (iconWrap) iconWrap.innerHTML = event.error ? ICON_X : getToolIcon(event.name || ""); + + const details = line.querySelector(".agent-tool-details"); + const existingResult = details.querySelector(".agent-tool-result"); + if (existingResult) existingResult.remove(); + const resultDetail = this._makeToolDetail("Result", event.content || "", true); + resultDetail.classList.add("agent-tool-result"); + details.appendChild(resultDetail); + + setAgentAscii(event.error ? "error" : "success"); + this.setThinking(true); + }, + + renderConfirm(event) { + this.setThinking(false); + const runId = event.run_id; + const id = event.id; + const tool = event.tool || "unknown"; + const args = event.arguments || {}; + + let card = id ? this.toolCards.get(id) : null; + + if (!card) { + // Build an inline confirmation line when no preceding tool_call card exists. + const line = document.createElement("div"); + line.className = "agent-log-line tool agent-tool confirm-required"; + line.dataset.runId = runId || ""; + line.dataset.callId = id || ""; + line.innerHTML = ` +
+ ${ICON_WARN} + ${escapeHtml(tool)} needs approval + + + + +
+
`; + agentLog.appendChild(line); + const details = line.querySelector(".agent-tool-details"); + details.appendChild(this._makeToolDetail("Arguments", JSON.stringify(args, null, 2), false)); + card = { el: line }; + } else { + // Replace the running status on the existing tool card with confirm buttons. + const line = card.el; + line.classList.add("confirm-required"); + const status = line.querySelector(".agent-tool-status"); + if (status) { + status.innerHTML = ` + + + + `; + } + line.dataset.runId = runId || ""; + } + + if (!runId) return; + const line = card.el; + const approveBtn = line.querySelector('.agent-confirm-btn[data-action="approve"]'); + const denyBtn = line.querySelector('.agent-confirm-btn[data-action="deny"]'); + + const submitConfirmation = async (approved) => { + if (approveBtn) approveBtn.disabled = true; + if (denyBtn) denyBtn.disabled = true; + const spinner = ''; + const label = approved ? "Allowing…" : "Denying…"; + const clicked = approved ? approveBtn : denyBtn; + if (clicked) clicked.innerHTML = spinner + " " + label; + try { + const r = await fetch(`/v1/agent/${encodeURIComponent(runId)}/confirm`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ approved }), + }); + if (!r.ok) { + const t = await r.text(); + throw new Error(`${r.status} ${t}`); + } + } catch (e) { + if (approveBtn) { + approveBtn.disabled = false; + approveBtn.innerHTML = `${ICON_CHECK} Allow`; + } + if (denyBtn) { + denyBtn.disabled = false; + denyBtn.innerHTML = `${ICON_X} Deny`; + } + this.renderError({ message: "Confirmation failed: " + e.message }); + } + }; + + if (approveBtn) approveBtn.addEventListener("click", () => submitConfirmation(true)); + if (denyBtn) denyBtn.addEventListener("click", () => submitConfirmation(false)); + }, + + renderError(event) { + this.setThinking(false); + setAgentAscii("error"); + const line = document.createElement("div"); + line.className = "agent-log-line error"; + line.innerHTML = `
Error: ${escapeHtml(event.message || "")}
`; + agentLog.appendChild(line); + }, + + renderDone(event) { + this.setThinking(false); + setAgentAscii("done"); + const line = document.createElement("div"); + line.className = "agent-log-line done"; + line.innerHTML = `
${ICON_CHECK} Agent finished.
`; + agentLog.appendChild(line); + }, + + renderRaw(event) { + this.setThinking(false); + const line = document.createElement("div"); + line.className = "agent-log-line"; + line.innerHTML = `
${escapeHtml(JSON.stringify(event, null, 2))}
`; + agentLog.appendChild(line); + } +}; + +async function runAgentTask() { + const task = input.value.trim(); + if (!task || agentRunning || !selectedModel) return; + + input.value = ""; + input.style.height = "auto"; + agentRenderer.clear(); + agentRenderer.renderPrompt(task); + agentRunning = true; + sendButton.disabled = true; + inputWrap.classList.add("generating"); + + // Ensure model is loaded + try { + await ensureModelLoaded(); + } catch (e) { + agentRenderer.addEvent({ type: "error", message: e.message }); + finishAgentRun(); + return; + } + + const autoConfirm = agentAutoConfirm.checked; + const maxTurns = parseInt(agentMaxTurns.value, 10) || 30; + + const abortController = new AbortController(); + agentAbort = () => abortController.abort(); + + agentRenderer.addEvent({ type: "status", message: `Running task with ${autoConfirm ? "auto-confirm" : "manual confirmation"}` }); + agentRenderer.setThinking(true); + + let agentStartTime = null; + let agentTokenCount = 0; + + try { + const r = await fetch("/v1/agent", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: selectedModel, + task: task, + auto_confirm: autoConfirm, + max_turns: maxTurns, + }), + signal: abortController.signal, + }); + if (!r.ok) { + const t = await r.text(); + throw new Error(`${r.status} ${t}`); + } + const reader = r.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop(); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed.startsWith("data:")) continue; + const payload = trimmed.slice(5).trim(); + if (payload === "[DONE]") continue; + try { + const event = JSON.parse(payload); + agentRenderer.addEvent(event); + if (event.type === "assistant" && event.content) { + if (agentStartTime === null) agentStartTime = Date.now(); + agentTokenCount += Math.max(1, Math.round(event.content.length / 4)); + } + } catch (e) { + // ignore malformed SSE payloads + } + } + } + if (agentStartTime && agentTokenCount > 0) { + const elapsed = (Date.now() - agentStartTime) / 1000; + const tps = elapsed > 0 ? (agentTokenCount / elapsed).toFixed(1) : null; + if (tps) ctxLabelTps.textContent = tps + " tok/s"; + } + } catch (e) { + if (e.name !== "AbortError") { + agentRenderer.addEvent({ type: "error", message: "Agent failed: " + e.message }); + } + } finally { + agentAbort = null; + finishAgentRun(); + } +} + +function finishAgentRun() { + agentRunning = false; + sendButton.disabled = false; + inputWrap.classList.remove("generating"); + agentRenderer.setThinking(false); + input.focus(); +} + +// ------------------------------------------------------------------ +// Slash commands +// ------------------------------------------------------------------ + +const SLASH_COMMANDS = [ + { name: "help", desc: "Show available slash commands", needsArgs: false }, + { name: "clear", desc: "Clear the current conversation", needsArgs: false }, + { name: "new", desc: "Start a new chat", needsArgs: false }, + { name: "model", desc: "Switch model, e.g. /model ", needsArgs: true }, + { name: "compact", desc: "Summarize context, optional: /compact ", needsArgs: false }, +]; + +let paletteSelectedIndex = -1; + +function parseSlashCommand(text) { + const trimmed = text.trim(); + if (!trimmed.startsWith("/")) return null; + const withoutSlash = trimmed.slice(1); + const firstSpace = withoutSlash.search(/\s/); + const command = firstSpace === -1 ? withoutSlash : withoutSlash.slice(0, firstSpace); + const rest = firstSpace === -1 ? "" : withoutSlash.slice(firstSpace + 1).trim(); + return { command: command.toLowerCase(), rest, raw: trimmed }; +} + +function getFilteredCommands(prefix) { + const p = prefix.toLowerCase(); + return SLASH_COMMANDS.filter((c) => c.name.startsWith(p)); +} + +function hideCommandPalette() { + commandPalette.classList.remove("open"); + commandPalette.innerHTML = ""; + paletteSelectedIndex = -1; +} + +function renderCommandPalette(filter = "") { + const items = filter === "" ? SLASH_COMMANDS.slice() : getFilteredCommands(filter); + commandPalette.innerHTML = ""; + if (items.length === 0) { + hideCommandPalette(); + return; + } + paletteSelectedIndex = Math.min(Math.max(paletteSelectedIndex, 0), items.length - 1); + for (let i = 0; i < items.length; i++) { + const cmd = items[i]; + const div = document.createElement("div"); + div.className = "command-item" + (i === paletteSelectedIndex ? " selected" : ""); + div.setAttribute("role", "option"); + div.setAttribute("aria-selected", String(i === paletteSelectedIndex)); + div.innerHTML = ` + /${escapeHtml(cmd.name)} + ${escapeHtml(cmd.desc)} + ${cmd.needsArgs ? "args" : "enter"} + `; + div.addEventListener("click", () => { + input.value = "/" + cmd.name + " "; + input.focus(); + hideCommandPalette(); + input.dispatchEvent(new Event("input")); + }); + div.addEventListener("mouseenter", () => { + paletteSelectedIndex = i; + renderCommandPalette(filter); + }); + commandPalette.appendChild(div); + } + commandPalette.classList.add("open"); +} + +function updateCommandPalette() { + const text = input.value; + if (!agentMode && text.startsWith("/") && !text.includes(" ")) { + const prefix = text.slice(1); + renderCommandPalette(prefix); + } else { + hideCommandPalette(); + } +} + +function executeSlashCommand(rawText) { + const parsed = parseSlashCommand(rawText); + if (!parsed) return false; + + const known = SLASH_COMMANDS.find((c) => c.name === parsed.command); + if (!known) { + showError(`Unknown command: /${escapeHtml(parsed.command)}. Type /help for available commands.`); + return true; + } + + switch (parsed.command) { + case "help": + renderHelpMessage(); + break; + case "clear": + clearChat(); + break; + case "new": + newChat(); + break; + case "model": + switchModel(parsed.rest); + break; + case "compact": + compactConversation(parsed.rest); + break; + } + return true; +} + +function renderHelpMessage() { + if (emptyState) emptyState.style.display = "none"; + const div = document.createElement("div"); + div.className = "message system command-hint"; + const roleLabel = document.createElement("div"); + roleLabel.className = "role"; + roleLabel.textContent = "Slash commands"; + div.appendChild(roleLabel); + const content = document.createElement("div"); + content.className = "content"; + let html = ""; + for (const cmd of SLASH_COMMANDS) { + html += `

/${cmd.name} ${escapeHtml(cmd.desc)}

`; + } + content.innerHTML = html; + div.appendChild(content); + chatLog.appendChild(div); + if (shouldAutoScroll(chatLog)) autoScroll(chatLog); +} + +function clearChat() { + conversation.length = 0; + chatLog.innerHTML = ""; + if (emptyState) emptyState.style.display = ""; + updateCtxMeter(0, models.find((m) => m.id === selectedModel)?.ctx || 131072); + ctxLabelTps.textContent = ""; + saveCurrentChat(); +} + +function switchModel(modelId) { + if (!modelId) { + showError("Usage: /model "); + return; + } + const m = models.find((x) => x.id === modelId || x.id.endsWith("/" + modelId) || (x.display_name && x.display_name.toLowerCase() === modelId.toLowerCase())); + if (!m) { + showError(`Model not found: ${escapeHtml(modelId)}`); + return; + } + selectedModel = m.id; + modelSelect.value = m.id; + updatePickerStatus(); + if (settingsPanel.classList.contains("open")) renderSettingsPanel(); + ensureModelLoaded().catch((e) => showError(e.message)); +} + +async function compactConversation(instruction) { + if (conversation.length === 0) { + showError("Nothing to compact."); + return; + } + if (!selectedModel) { + showError("Select a model first."); + return; + } + try { + await ensureModelLoaded(); + } catch (e) { + showError(e.message); + return; + } + + const systemPrompt = instruction + ? `Summarize the following conversation concisely. Focus on: ${instruction}. Preserve key facts, decisions, code snippets, and user intent. Return only the summary.` + : "Summarize the following conversation concisely. Preserve key facts, decisions, code snippets, and user intent. Return only the summary."; + + const summaryMsg = { role: "system", content: systemPrompt }; + const messages = [summaryMsg, ...conversation]; + + try { + const r = await fetch("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: selectedModel, messages, stream: false }), + }); + if (!r.ok) { + const t = await r.text(); + throw new Error(`${r.status} ${t}`); + } + const data = await r.json(); + const summary = data.choices?.[0]?.message?.content?.trim(); + if (!summary) { + throw new Error("Model returned an empty summary."); + } + + conversation.length = 0; + conversation.push({ role: "system", content: "Summary of prior conversation:\n\n" + summary }); + + chatLog.innerHTML = ""; + if (emptyState) emptyState.style.display = "none"; + const msg = createMessage("system", "Context compacted. Summary:\n\n" + summary); + if (shouldAutoScroll(chatLog)) autoScroll(chatLog); + + const m = models.find((x) => x.id === selectedModel); + updateCtxMeter(estimateTokens(), m?.ctx || 131072); + saveCurrentChat(); + } catch (e) { + showError("Compact failed: " + e.message); + } +} + +input.addEventListener("keydown", (e) => { + if (commandPalette.classList.contains("open")) { + const items = commandPalette.querySelectorAll(".command-item"); + if (e.key === "ArrowDown") { + e.preventDefault(); + paletteSelectedIndex = (paletteSelectedIndex + 1) % items.length; + renderCommandPalette(input.value.slice(1)); + return; + } + if (e.key === "ArrowUp") { + e.preventDefault(); + paletteSelectedIndex = (paletteSelectedIndex - 1 + items.length) % items.length; + renderCommandPalette(input.value.slice(1)); + return; + } + if (e.key === "Escape") { + e.preventDefault(); + hideCommandPalette(); + return; + } + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + const selected = items[paletteSelectedIndex]; + if (selected) selected.click(); + return; + } + } + + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + const text = input.value.trim(); + if (!agentMode && executeSlashCommand(text)) { + input.value = ""; + input.style.height = "auto"; + hideCommandPalette(); + return; + } + if (agentMode) { + runAgentTask(); + } else { + sendMessage(); + } + } +}); + +sendButton.addEventListener("click", () => { + const text = input.value.trim(); + if (!agentMode && executeSlashCommand(text)) { + input.value = ""; + input.style.height = "auto"; + hideCommandPalette(); + return; + } + if (agentMode) { + runAgentTask(); + } else { + sendMessage(); + } +}); + +input.addEventListener("input", () => { + input.style.height = "auto"; + input.style.height = Math.min(input.scrollHeight, 96) + "px"; + updateCommandPalette(); +}); + +(async function init() { + await fetchModels(); + await syncChatsFromServer(); +})(); +statusPoller = setInterval(fetchStatus, 3000); \ No newline at end of file diff --git a/src/arc_llama/tui.py b/src/arc_llama/tui.py index ae8128f..1efa109 100644 --- a/src/arc_llama/tui.py +++ b/src/arc_llama/tui.py @@ -11,6 +11,7 @@ """ from __future__ import annotations +from pathlib import Path from typing import Any import httpx @@ -245,7 +246,7 @@ def _render(self, s: dict[str, Any]) -> None: status = "[b]LOADED[/]" if loaded else "[dim]idle[/]" kv = f"{m.get('cache_type_k') or '?'}/{m.get('cache_type_v') or '?'}" path = m.get("path") or "—" - short = "/".join(p for p in path.split("/") if p)[-50:] + short = "/".join(Path(path).parts)[-50:] row_text = (status, m["name"], m.get("gpu_pci_slot", "?"), str(m.get("port") or "?"), str(m.get("ctx") or "?"), kv, short) if loaded: diff --git a/tests/conftest.py b/tests/conftest.py index 4b15adf..2a4060c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ from __future__ import annotations +import sys from pathlib import Path import pytest @@ -9,7 +10,16 @@ @pytest.fixture def make_sysfs_gpu(tmp_path: Path): - """Factory fixture that creates fake sysfs PCI device entries.""" + """Factory fixture that creates fake sysfs PCI device entries. + + Simulates Linux's /sys/bus/pci/devices/ layout, which only exists + on Linux. The slot name contains colons (e.g. "0000:03:00.0"), which are + illegal in Windows path components, so this is skipped there rather than + rewritten — there's no Windows sysfs equivalent to fake. + """ + if sys.platform == "win32": + pytest.skip("simulates Linux-only /sys/bus/pci layout") + def _make(slot: str, device_id: int = 0xE211, vram_bytes: int | None = None, driver: str = "xe"): base = tmp_path / "sys" / "bus" / "pci" / "devices" / slot base.mkdir(parents=True) diff --git a/tests/test_agent_tools.py b/tests/test_agent_tools.py index 81b2da4..516bb2a 100644 --- a/tests/test_agent_tools.py +++ b/tests/test_agent_tools.py @@ -1,6 +1,7 @@ """Tests for the agent tool sandbox.""" from __future__ import annotations +import sys from pathlib import Path from unittest.mock import AsyncMock, MagicMock @@ -78,7 +79,11 @@ def test_run_command_echo(tmp_root: Path) -> None: def test_run_command_cwd_is_root(tmp_root: Path) -> None: - res = run_command("pwd", tmp_root) + # Use the interpreter's own cwd report rather than a shell builtin like + # `pwd` — on the Windows CI runner that resolves to Git Bash's pwd.exe, + # which prints an MSYS-style path ("/c/Users/...") instead of a native + # Windows one, even though the actual cwd is correct. + res = run_command(f'"{sys.executable}" -c "import os; print(os.getcwd())"', tmp_root) assert not res.error assert str(tmp_root) in res.content diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index afd441f..d59ce09 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -1,7 +1,7 @@ """Tests for arc_llama.benchmark — measurement, formatting, sweep.""" from __future__ import annotations -import json +import sys from pathlib import Path from typing import Any @@ -59,6 +59,11 @@ def test_find_drm_card_no_sys(self, tmp_path: Path, monkeypatch: pytest.MonkeyPa monkeypatch.setattr("arc_llama.benchmark.Path", lambda p: tmp_path / p) assert _find_drm_card("0000:03:00.0") is None + @pytest.mark.skipif( + sys.platform == "win32", + reason="fakes a Linux /sys/bus/pci/devices/ path; colon in the " + "slot name is illegal on Windows and there's no sysfs to fake there", + ) def test_find_drm_card_found(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch): drm = tmp_path / "sys" / "class" / "drm" card = drm / "card1" diff --git a/tests/test_config.py b/tests/test_config.py index 0b5d778..d851395 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,5 +1,9 @@ from __future__ import annotations +from pathlib import Path + +import pytest + from arc_llama.config import Config, GPUConfig, ModelConfig, load_config @@ -57,3 +61,39 @@ def test_find_model_matches_name_alias_display_name_and_filename(tmp_path): assert cfg.find_model("qwen 3").name == "qwen" assert cfg.find_model("Q4_K_M").name == "qwen" assert cfg.find_model("missing") is None + + +def test_windows_default_paths_use_appdata(monkeypatch): + import os + + from arc_llama import config as config_mod + + monkeypatch.setattr(config_mod.sys, "platform", "win32") + monkeypatch.setenv("APPDATA", r"C:\Users\test\AppData\Roaming") + monkeypatch.setenv("LOCALAPPDATA", r"C:\Users\test\AppData\Local") + appdata = Path(os.environ["APPDATA"]) + localappdata = Path(os.environ["LOCALAPPDATA"]) + assert config_mod.default_config_path() == appdata / "arc-llama" / "config.toml" + assert config_mod.default_models_dir() == localappdata / "arc-llama" / "models" + assert config_mod.default_state_dir() == localappdata / "arc-llama" + + +def test_migrate_config_adds_missing_sections(): + from arc_llama.config import CONFIG_VERSION, migrate_config + + raw = migrate_config({}) + assert raw["version"] == CONFIG_VERSION + assert raw["server"] == {} + assert raw["paths"] == {} + assert raw["gpus"] == [] + assert raw["models"] == [] + assert raw["upstreams"] == [] + + +def test_validate_config_rejects_bad_structure(): + from arc_llama.config import validate_config + + with pytest.raises(ValueError, match="version"): + validate_config({"version": "not-an-int"}) + with pytest.raises(ValueError, match="gpus"): + validate_config({"version": 1, "gpus": {}}) diff --git a/tests/test_detect.py b/tests/test_detect.py index 60f5da9..0ee9acd 100644 --- a/tests/test_detect.py +++ b/tests/test_detect.py @@ -12,7 +12,6 @@ _enrich_with_clinfo, _parse_clinfo_devices, _scan_pci, - detect_gpus, lspci_intel_gpus, ) diff --git a/tests/test_launcher.py b/tests/test_launcher.py index 24a8544..180da4e 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -2,6 +2,7 @@ from __future__ import annotations import os +import subprocess from pathlib import Path import pytest @@ -191,7 +192,6 @@ async def test_wait_ready_true_when_healthy(self, monkeypatch: pytest.MonkeyPatc srv.started_at = 0.0 import httpx - original_get = httpx.AsyncClient.get async def _fake_get(self, url): if "/health" in url: @@ -225,3 +225,146 @@ def test_stop_idempotent(self): # Should not raise when not running srv.stop() assert srv.is_running is False + + +class TestLogHandling: + def test_log_rotation_renames_existing_large_log(self, tmp_path): + from arc_llama import launcher as launcher_mod + + log_dir = tmp_path / "logs" + log_dir.mkdir() + log_path = log_dir / "m.log" + log_path.write_bytes(b"x" * (launcher_mod._MAX_LOG_BYTES + 1)) + launcher_mod._rotate_log(log_path) + assert not log_path.exists() + assert (log_dir / "m.log.1").exists() + + def test_tail_log_returns_last_lines(self, tmp_path): + from arc_llama.config import Config, GPUConfig, ModelConfig + + plan = build_plan( + Config(paths=type("P", (), {"llama_server": "/bin/llama-server"})()), + ModelConfig(name="m", path="/m.gguf", port=18080, gpu_pci_slot="00:00.0"), + GPUConfig(pci_slot="00:00.0", sycl_index=0, arch="battlemage"), + ) + srv = LlamaServer(plan) + log_dir = tmp_path / "logs" + original_popen = subprocess.Popen + + def _fake_popen(*args, **kwargs): + class FakeProc: + pid = 12345 + def poll(self): + return None + def send_signal(self, sig): + pass + def wait(self, timeout): + self._waited = True + return FakeProc() + + subprocess.Popen = _fake_popen + try: + srv.start(log_dir=log_dir) + srv._log_file.write(b"line1\nline2\nline3\n") + srv._log_file.flush() + assert srv.tail_log(lines=2) == "line2\nline3" + finally: + subprocess.Popen = original_popen + srv.stop() + + +class TestWindowsLifecycle: + def test_start_uses_create_new_process_group(self, monkeypatch, tmp_path): + from arc_llama import launcher as launcher_mod + + monkeypatch.setattr(launcher_mod, "_IS_WINDOWS", True) + plan = build_plan( + Config(paths=type("P", (), {"llama_server": "/bin/llama-server"})()), + ModelConfig(name="m", path="/m.gguf", port=18080, gpu_pci_slot="00:00.0"), + GPUConfig(pci_slot="00:00.0", sycl_index=0, arch="battlemage"), + ) + srv = LlamaServer(plan) + log_dir = tmp_path / "logs" + called = {} + original_popen = subprocess.Popen + + def _fake_popen(*args, **kwargs): + called["kwargs"] = kwargs + class FakeProc: + pid = 12345 + def poll(self): + return None + return FakeProc() + + subprocess.Popen = _fake_popen + try: + srv.start(log_dir=log_dir) + finally: + subprocess.Popen = original_popen + assert called["kwargs"]["creationflags"] == getattr( + subprocess, "CREATE_NEW_PROCESS_GROUP", 0 + ) + assert "preexec_fn" not in called["kwargs"] + + def test_stop_sends_ctrl_break_then_force_kills_tree_on_timeout(self, monkeypatch): + from arc_llama import launcher as launcher_mod + + monkeypatch.setattr(launcher_mod, "_IS_WINDOWS", True) + plan = build_plan( + Config(paths=type("P", (), {"llama_server": "/bin/llama-server"})()), + ModelConfig(name="m", path="/m.gguf", port=18080, gpu_pci_slot="00:00.0"), + GPUConfig(pci_slot="00:00.0", sycl_index=0, arch="battlemage"), + ) + srv = LlamaServer(plan) + calls = [] + + class FakeProc: + pid = 12345 + def poll(self): + return None + def send_signal(self, sig): + calls.append(("send_signal", sig)) + def wait(self, timeout): + if not any(c[0] == "taskkill" for c in calls): + raise subprocess.TimeoutExpired("cmd", timeout) + + def _fake_run(cmd, **kwargs): + calls.append(("taskkill", cmd)) + return subprocess.CompletedProcess(cmd, 0) + + monkeypatch.setattr(subprocess, "run", _fake_run) + srv.process = FakeProc() + srv.stop(drain_seconds=0.1) + assert calls[0] == ("send_signal", launcher_mod._CTRL_BREAK_EVENT) + assert calls[1][0] == "taskkill" + assert calls[1][1] == ["taskkill", "/F", "/T", "/PID", "12345"] + + def test_stop_skips_force_kill_when_ctrl_break_succeeds(self, monkeypatch): + from arc_llama import launcher as launcher_mod + + monkeypatch.setattr(launcher_mod, "_IS_WINDOWS", True) + plan = build_plan( + Config(paths=type("P", (), {"llama_server": "/bin/llama-server"})()), + ModelConfig(name="m", path="/m.gguf", port=18080, gpu_pci_slot="00:00.0"), + GPUConfig(pci_slot="00:00.0", sycl_index=0, arch="battlemage"), + ) + srv = LlamaServer(plan) + calls = [] + + class FakeProc: + pid = 12345 + def poll(self): + return None + def send_signal(self, sig): + calls.append(("send_signal", sig)) + def wait(self, timeout): + return None + + def _fake_run(cmd, **kwargs): + calls.append(("taskkill", cmd)) + return subprocess.CompletedProcess(cmd, 0) + + monkeypatch.setattr(subprocess, "run", _fake_run) + srv.process = FakeProc() + srv.stop(drain_seconds=0.1) + assert calls == [("send_signal", launcher_mod._CTRL_BREAK_EVENT)] diff --git a/tests/test_recipes.py b/tests/test_recipes.py index c05a0af..0d417f8 100644 --- a/tests/test_recipes.py +++ b/tests/test_recipes.py @@ -1,8 +1,6 @@ """Tests for arc_llama.recipes — VRAM math, recipe generation, KV sizing.""" from __future__ import annotations -import pytest - from arc_llama.arch import Arch from arc_llama.recipes import ( DEFAULT_CTX_CAP, diff --git a/tests/test_router.py b/tests/test_router.py index b960e3d..ebeaadc 100644 --- a/tests/test_router.py +++ b/tests/test_router.py @@ -1,5 +1,7 @@ from __future__ import annotations +import pytest + from arc_llama.router import Router @@ -62,3 +64,39 @@ async def test_multi_resident_policy_keeps_models_on_different_gpus_running(tmp_ assert FakeServer.starts == ["qwen", "gemma"] assert FakeServer.stops == [] + + +async def test_vram_guard_refuses_oversized_model(tmp_path, monkeypatch): + from conftest import make_config + + import arc_llama.router as router_mod + + FakeServer.starts = [] + FakeServer.stops = [] + cfg = make_config(tmp_path, single_resident=False) + monkeypatch.setattr(router_mod, "LlamaServer", FakeServer) + monkeypatch.setattr(router_mod, "_estimate_model_vram_mb", lambda m: 999_999) + rt = Router(cfg) + + with pytest.raises(RuntimeError, match="needs ~"): + await rt.ensure_active("qwen") + assert FakeServer.starts == [] + + +async def test_metrics_increment_on_load_and_stop(tmp_path, monkeypatch): + from conftest import make_config + + import arc_llama.router as router_mod + + FakeServer.starts = [] + FakeServer.stops = [] + cfg = make_config(tmp_path, single_resident=False) + monkeypatch.setattr(router_mod, "LlamaServer", FakeServer) + rt = Router(cfg) + + await rt.ensure_active("qwen") + assert rt.metrics["loads"] == 1 + assert rt.metrics["load_errors"] == 0 + + await rt.stop_one("qwen") + assert rt.metrics["stops"] == 1 diff --git a/tests/test_server.py b/tests/test_server.py index 12187fe..6d02464 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -27,6 +27,13 @@ def __init__(self, cfg, log_dir=None): aliases=["qwen.gguf"], ) self._servers = {"qwen": FakeBackend()} + self.metrics = { + "loads": 5, + "stops": 2, + "load_errors": 1, + "last_load_at": 1234.0, + "last_error": None, + } def all_models(self): return [self.model] @@ -370,8 +377,40 @@ def test_upstream_streaming_proxy_forwards_sse_and_closes_upstream(monkeypatch): assert response.status_code == 200 assert body == b"data: upstream chunk 1\n\ndata: upstream chunk 2\n\n" assert response.headers["content-type"].startswith("text/event-stream") - assert response.headers["x-upstream"] == "stream-ok" - assert "transfer-encoding" not in response.headers - assert "content-length" not in response.headers - assert mgr.last_streaming_ok is True - assert mgr.last_stream.closed is True + + +def test_health_includes_loaded_models_and_uptime(monkeypatch): + import arc_llama.server as server_mod + + monkeypatch.setattr(server_mod, "Router", FakeRouter) + monkeypatch.setattr(server_mod, "UpstreamManager", FakeUpstreamManager) + monkeypatch.setattr(server_mod.httpx, "AsyncClient", FakeAsyncClient) + app = create_app() + + with TestClient(app) as client: + r = client.get("/health") + assert r.status_code == 200 + data = r.json() + assert data["status"] == "ok" + assert data["loaded_model_count"] == 1 + assert "qwen" in data["loaded_models"] + assert data["uptime_seconds"] >= 0 + + +def test_admin_metrics_returns_counters_and_gpus(monkeypatch): + import arc_llama.server as server_mod + + monkeypatch.setattr(server_mod, "Router", FakeRouter) + monkeypatch.setattr(server_mod, "UpstreamManager", FakeUpstreamManager) + monkeypatch.setattr(server_mod.httpx, "AsyncClient", FakeAsyncClient) + app = create_app() + + with TestClient(app) as client: + r = client.get("/admin/metrics") + assert r.status_code == 200 + data = r.json() + assert data["loads"] == 5 + assert data["stops"] == 2 + assert data["load_errors"] == 1 + assert data["active_models"] == ["qwen"] + assert any(g["pci_slot"] == "0000:03:00.0" for g in data["gpus"]) diff --git a/tests/test_server_chats.py b/tests/test_server_chats.py index afe968f..352aed7 100644 --- a/tests/test_server_chats.py +++ b/tests/test_server_chats.py @@ -115,3 +115,42 @@ def test_search_endpoint(tmp_path, monkeypatch): assert len(data) == 1 assert data[0]["chat"]["id"] == "chat-1" assert data[0]["matching_message_indices"] == [-1, 0] + + +def test_export_and_import_chats(tmp_path, monkeypatch): + monkeypatch.setattr(server_mod, "Router", FakeRouter) + monkeypatch.setattr(server_mod, "UpstreamManager", FakeUpstreamManager) + app = _app(tmp_path) + + with TestClient(app) as client: + client.post("/v1/chats", json={"id": "chat-1", "title": "Plan"}) + client.patch( + "/v1/chats/chat-1", + json={"messages": [{"role": "user", "content": "hello"}]}, + ) + + export_resp = client.get("/v1/chats/export") + assert export_resp.status_code == 200 + payload = export_resp.json() + assert payload["version"] == 1 + assert len(payload["chats"]) == 1 + assert payload["chats"][0]["id"] == "chat-1" + + import_resp = client.post( + "/v1/chats/import", + json={"chats": payload["chats"], "overwrite": False}, + ) + assert import_resp.status_code == 200 + summary = import_resp.json() + assert summary["imported"] == 0 + assert summary["skipped"] == 1 + + payload["chats"][0]["id"] = "chat-2" + import_resp = client.post( + "/v1/chats/import", + json={"chats": payload["chats"], "overwrite": False}, + ) + assert import_resp.status_code == 200 + summary = import_resp.json() + assert summary["imported"] == 1 + assert client.get("/v1/chats/chat-2").status_code == 200