From da6ef50254b5b45f9bfbd1b85f6e6cda66ab84c0 Mon Sep 17 00:00:00 2001
From: readwrightexecute <me@austinwright.com>
Date: Sat, 23 May 2026 10:31:39 -0500
Subject: [PATCH] docs: document config.yaml

---
 README.md                           |  84 ++++-----
 docs/CONFIGURATION.md               | 212 +++++++++++++++++++++++
 internal/config/config_docs_test.go | 253 ++++++++++++++++++++++++++++
 3 files changed, 499 insertions(+), 50 deletions(-)
 create mode 100644 docs/CONFIGURATION.md
 create mode 100644 internal/config/config_docs_test.go

diff --git a/README.md b/README.md
index e5e5c8e..c586d57 100644
--- a/README.md
+++ b/README.md
@@ -275,17 +275,38 @@ for all 10 per-language benchmark deep dives.
 
 ## Configuration
 
-All configuration is via environment variables:
+Lumen supports persistent YAML configuration and environment variable overrides.
+For full details, see [docs/CONFIGURATION.md](docs/CONFIGURATION.md).
 
-| Variable                 | Default                  | Description                                                   |
-| ------------------------ | ------------------------ | ------------------------------------------------------------- |
+By default, Lumen reads YAML config from:
+
+- `$XDG_CONFIG_HOME/lumen/config.yaml`, or
+- `~/.config/lumen/config.yaml` when `XDG_CONFIG_HOME` is unset.
+
+A minimal Ollama config looks like this:
+
+```yaml
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: ordis/jina-embeddings-v2-base-code
+```
+
+Environment variables override YAML values and are useful for one-off changes.
+They only affect the first configured server (`servers[0]`).
+
+| Variable                 | Default                  | Description                                                      |
+| ------------------------ | ------------------------ | ---------------------------------------------------------------- |
 | `LUMEN_EMBED_MODEL`      | see note ¹               | Embedding model; use with `LUMEN_EMBED_DIMS` for unlisted models |
-| `LUMEN_BACKEND`          | `ollama`                 | Embedding backend (`ollama` or `lmstudio`)                    |
-| `OLLAMA_HOST`            | `http://localhost:11434` | Ollama server URL                                             |
-| `LM_STUDIO_HOST`         | `http://localhost:1234`  | LM Studio server URL                                          |
-| `LUMEN_MAX_CHUNK_TOKENS` | `512`                    | Max tokens per chunk before splitting                         |
-| `LUMEN_EMBED_DIMS`       | —                        | Override embedding dimensions (required for unlisted models)  |
-| `LUMEN_EMBED_CTX`        | `8192` (unlisted models) | Override context window length                                |
+| `LUMEN_BACKEND`          | `ollama`                 | Embedding backend (`ollama` or `lmstudio`)                       |
+| `OLLAMA_HOST`            | `http://localhost:11434` | Ollama server URL                                                |
+| `LM_STUDIO_HOST`         | `http://localhost:1234`  | LM Studio server URL                                             |
+| `LUMEN_MAX_CHUNK_TOKENS` | `512`                    | Max tokens per chunk before splitting                            |
+| `LUMEN_FRESHNESS_TTL`    | `60s`                    | Freshness cache duration                                         |
+| `LUMEN_REINDEX_TIMEOUT`  | `0s`                     | Config-level reindex timeout                                     |
+| `LUMEN_LOG_LEVEL`        | `info`                   | Logging verbosity                                                |
+| `LUMEN_EMBED_DIMS`       | —                        | Override embedding dimensions (required for unlisted models)     |
+| `LUMEN_EMBED_CTX`        | `8192` (unlisted models) | Override context window length                                   |
 
 ¹ `ordis/jina-embeddings-v2-base-code` (Ollama),
 `nomic-ai/nomic-embed-code-GGUF` (LM Studio)
@@ -303,6 +324,7 @@ Dimensions and context length are configured automatically per model:
 | `nomic-embed-text`                   | Ollama    | 768  | 8192    | Untested                                                              |
 | `qwen3-embedding:0.6b`               | Ollama    | 1024 | 32768   | Untested                                                              |
 | `all-minilm`                         | Ollama    | 384  | 512     | Untested                                                              |
+| `manutic/nomic-embed-code:7b`        | Ollama    | 3584 | 32768   | Untested                                                              |
 
 Switching models creates a separate index automatically. The model name is part
 of the database path hash, so different models never collide.
@@ -312,47 +334,9 @@ of the database path hash, so different models never collide.
 > Studio entry both named `foo`), they share the same index — use distinct
 > model names per backend to avoid collisions.
 
-### Selecting a server per invocation
-
-`lumen index` and `lumen search` accept `--model`/`-m` and `--backend`/`-b`
-to pick from a multi-server `config.yaml`. The selection filters the
-configured servers to those matching both fields; failover still works
-within the filtered subset.
-
-```sh
-# Index with the Ollama server matching this model name.
-lumen index --model ordis/jina-embeddings-v2-base-code .
-
-# Same model name hosted on LM Studio (present in YAML, not in the
-# static registry) — accepted because the name is configured.
-lumen index --model text-embedding-jina-embeddings-v2-base-code .
-
-# Disambiguate when the same model is configured on two backends.
-lumen index --model my-embed --backend lmstudio .
-
-# Pick the first configured Ollama server regardless of model.
-lumen search --backend ollama "…"
-```
-
-If `--model` is not configured in YAML but is a known registry model (and
-`--backend` is unset), Lumen falls back to mutating the default server's
-model — preserving `lumen index --model all-minilm .` for users with no YAML.
-
-### Using a custom or unlisted model
-
-If your model is not in the registry above, set `LUMEN_EMBED_DIMS` to bypass the
-registry check. `LUMEN_EMBED_CTX` is optional and defaults to `8192`.
-
-Both variables can also override values for _known_ models — useful when running
-a model variant with a longer context window or different output dimensions.
-
-```sh
-LUMEN_BACKEND=lmstudio
-LM_STUDIO_HOST=http://localhost:8801
-LUMEN_EMBED_MODEL=mlx-community/Qwen3-Embedding-8B-4bit-DWQ
-LUMEN_EMBED_DIMS=4096
-LUMEN_EMBED_CTX=40960   # optional, defaults to 8192
-```
+See [docs/CONFIGURATION.md](docs/CONFIGURATION.md) for multi-server setups,
+LM Studio examples, custom models, validation rules, environment-variable
+precedence, and CLI server selection.
 
 ## Controlling what gets indexed
 
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
new file mode 100644
index 0000000..2f3ecd4
--- /dev/null
+++ b/docs/CONFIGURATION.md
@@ -0,0 +1,212 @@
+# Lumen Configuration
+
+Lumen can be configured with a YAML file and environment variables. The YAML
+file is the best choice for persistent multi-server setups; environment
+variables are useful for one-off overrides and backwards compatibility.
+
+## Config file location
+
+Lumen reads YAML config from:
+
+- `$XDG_CONFIG_HOME/lumen/config.yaml` when `XDG_CONFIG_HOME` is set
+- otherwise `~/.config/lumen/config.yaml`
+
+The MCP server watches the config directory for changes where supported by the
+underlying filesystem watcher and reloads when `config.yaml` is written or
+created.
+
+## Precedence
+
+Configuration is applied in this order, with later layers overriding earlier
+layers:
+
+1. built-in defaults
+2. YAML config file
+3. environment variables
+4. command/programmatic model overrides
+5. `--model` / `--backend` server selection filters
+
+## Minimal config
+
+Ollama:
+
+```yaml
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: ordis/jina-embeddings-v2-base-code
+```
+
+LM Studio:
+
+```yaml
+servers:
+  - backend: lmstudio
+    host: http://localhost:1234
+    model: nomic-ai/nomic-embed-code-GGUF
+```
+
+## Full example
+
+```yaml
+log_level: info
+max_chunk_tokens: 512
+freshness_ttl: 60s
+reindex_timeout: 0s
+
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: ordis/jina-embeddings-v2-base-code
+    dims: 768
+    ctx_length: 8192
+    min_score: 0.35
+```
+
+## Top-level fields
+
+| Field | Type | Default | Description |
+| --- | --- | --- | --- |
+| `log_level` | string | `info` | Logging verbosity. |
+| `max_chunk_tokens` | integer | `512` | Maximum estimated tokens per chunk before splitting. |
+| `freshness_ttl` | duration | `60s` | How long a confirmed-fresh index is trusted before rechecking. |
+| `reindex_timeout` | duration | `0s` | Reindex timeout from config. `0s` means no config-level timeout; command/server code may still apply its own operational safeguards. |
+| `servers` | list | one default Ollama server | Embedding backend configurations. |
+
+Durations use Go duration syntax such as `30s`, `5m`, or `1h`.
+
+## Server fields
+
+| Field | Type | Required | Description |
+| --- | --- | --- | --- |
+| `backend` | string | yes | `ollama` or `lmstudio`. |
+| `host` | URL | yes | HTTP(S) base URL for the embedding backend. |
+| `model` | string | yes | Embedding model name. |
+| `dims` | integer | for unknown models | Embedding vector dimension. Optional for known models. |
+| `ctx_length` | integer | no | Embedding model context length. Optional for known models. |
+| `min_score` | float | no | Default minimum cosine similarity threshold. |
+
+## Known embedding models
+
+Dimensions, context length, and default minimum score are configured
+automatically for known models:
+
+| Model | Backend | Dims | Context | Min score |
+| --- | --- | ---: | ---: | ---: |
+| `ordis/jina-embeddings-v2-base-code` | `ollama` | 768 | 8192 | 0.35 |
+| `nomic-embed-text` | `ollama` | 768 | 8192 | 0.30 |
+| `nomic-ai/nomic-embed-code-GGUF` | `lmstudio` | 3584 | 8192 | 0.15 |
+| `qwen3-embedding:8b` | `ollama` | 4096 | 40960 | 0.30 |
+| `qwen3-embedding:4b` | `ollama` | 2560 | 40960 | 0.30 |
+| `qwen3-embedding:0.6b` | `ollama` | 1024 | 32768 | 0.30 |
+| `all-minilm` | `ollama` | 384 | 512 | 0.20 |
+| `manutic/nomic-embed-code:7b` | `ollama` | 3584 | 32768 | 0.15 |
+
+`text-embedding-nomic-embed-code` is treated as an alias for
+`nomic-ai/nomic-embed-code-GGUF`.
+
+Switching models creates a separate index automatically because the model name
+is part of the database path hash. The backend is not part of that hash, so use
+distinct model names if the same model is served from multiple backends with
+incompatible embeddings.
+
+## Environment variable overrides
+
+Environment variables are applied after the YAML config file and before command
+or server-selection overrides.
+
+| Environment variable | Overrides |
+| --- | --- |
+| `LUMEN_MAX_CHUNK_TOKENS` | `max_chunk_tokens` |
+| `LUMEN_FRESHNESS_TTL` | `freshness_ttl` |
+| `LUMEN_REINDEX_TIMEOUT` | `reindex_timeout` |
+| `LUMEN_LOG_LEVEL` | `log_level` |
+| `LUMEN_BACKEND` | `servers[0].backend`; resets server 0 to backend defaults first |
+| `LUMEN_EMBED_MODEL` | `servers[0].model` |
+| `LUMEN_EMBED_DIMS` | `servers[0].dims` |
+| `LUMEN_EMBED_CTX` | `servers[0].ctx_length` |
+| `OLLAMA_HOST` | `servers[0].host` when server 0 backend is `ollama` |
+| `LM_STUDIO_HOST` | `servers[0].host` when server 0 backend is `lmstudio` |
+
+Environment variables only modify `servers[0]`. They do not rewrite every
+server in a multi-server config.
+
+## Selecting a server with CLI flags
+
+`lumen index` and `lumen search` accept `--model` / `-m` and `--backend` / `-b`
+to select from the configured server list:
+
+```bash
+lumen index --model ordis/jina-embeddings-v2-base-code .
+lumen search --backend ollama "authentication flow"
+lumen index --model my-embed --backend lmstudio .
+```
+
+`--model` and `--backend` filter the configured server list. If multiple servers
+match, order is preserved for failover. If no servers match, Lumen returns a
+descriptive error that includes the configured `(backend, model)` pairs.
+
+If `--model` is not configured in YAML but is a known registry model and
+`--backend` is unset, Lumen falls back to overriding the default server's model.
+That preserves legacy commands such as:
+
+```bash
+lumen index --model all-minilm .
+```
+
+## Multi-server and failover examples
+
+```yaml
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: ordis/jina-embeddings-v2-base-code
+  - backend: ollama
+    host: http://backup-ollama.example:11434
+    model: ordis/jina-embeddings-v2-base-code
+  - backend: lmstudio
+    host: http://localhost:1234
+    model: nomic-ai/nomic-embed-code-GGUF
+```
+
+When more than one configured server matches the selected backend/model, Lumen
+keeps the configured order and can fail over within that filtered subset.
+
+## Unknown/custom models
+
+If a model is not in the known model table or alias map, set `dims` explicitly:
+
+```yaml
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: my-custom-embedding-model
+    dims: 1024
+    ctx_length: 8192
+    min_score: 0.20
+```
+
+`ctx_length` and `min_score` are optional for custom models. If `min_score` is
+omitted, Lumen derives a dimension-aware default from `dims`.
+
+## Validation errors
+
+Lumen validates configuration before using it. Common invalid configs include:
+
+- empty `servers`
+- missing `backend`
+- unknown `backend`
+- missing `host`
+- invalid `host` URL, or a URL that is not `http://` or `https://`
+- missing `model`
+- unknown model with no explicit `dims`
+
+## MCP/server behavior
+
+The stdio MCP server loads the same configuration and uses the same precedence
+rules. When watching is available, it watches the config directory and reloads
+when the configured `config.yaml` file is written or created.
+
+Agent hosts and plugin wrappers may add their own environment variables before
+starting Lumen. Prefer YAML for stable multi-server setups, and use environment
+variables for per-session overrides.
diff --git a/internal/config/config_docs_test.go b/internal/config/config_docs_test.go
new file mode 100644
index 0000000..d2e8cc8
--- /dev/null
+++ b/internal/config/config_docs_test.go
@@ -0,0 +1,253 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+func writeConfigDocFixture(t *testing.T, body string) string {
+	t.Helper()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.yaml")
+	if err := os.WriteFile(path, []byte(body), 0o644); err != nil {
+		t.Fatalf("write config fixture: %v", err)
+	}
+	return path
+}
+
+func clearConfigDocEnv(t *testing.T) {
+	t.Helper()
+	for _, key := range []string{
+		"LUMEN_BACKEND",
+		"LUMEN_EMBED_MODEL",
+		"LUMEN_EMBED_DIMS",
+		"LUMEN_EMBED_CTX",
+		"OLLAMA_HOST",
+		"LM_STUDIO_HOST",
+		"LUMEN_MAX_CHUNK_TOKENS",
+		"LUMEN_FRESHNESS_TTL",
+		"LUMEN_REINDEX_TIMEOUT",
+		"LUMEN_LOG_LEVEL",
+	} {
+		t.Setenv(key, "")
+	}
+}
+
+func TestDocumentationMinimalOllamaConfig(t *testing.T) {
+	clearConfigDocEnv(t)
+	path := writeConfigDocFixture(t, `
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: ordis/jina-embeddings-v2-base-code
+`)
+
+	svc, err := NewConfigService(path)
+	if err != nil {
+		t.Fatalf("NewConfigService: %v", err)
+	}
+
+	servers := svc.Servers()
+	if len(servers) != 1 {
+		t.Fatalf("Servers() len = %d, want 1", len(servers))
+	}
+	if servers[0].Backend != BackendOllama {
+		t.Fatalf("Backend = %q, want %q", servers[0].Backend, BackendOllama)
+	}
+	if got := svc.ServerDims(0); got != 768 {
+		t.Fatalf("ServerDims(0) = %d, want 768", got)
+	}
+	if got := svc.ServerCtxLength(0); got != 8192 {
+		t.Fatalf("ServerCtxLength(0) = %d, want 8192", got)
+	}
+	if got := svc.ServerMinScore(0); got != 0.35 {
+		t.Fatalf("ServerMinScore(0) = %f, want 0.35", got)
+	}
+}
+
+func TestDocumentationMinimalLMStudioConfig(t *testing.T) {
+	clearConfigDocEnv(t)
+	path := writeConfigDocFixture(t, `
+servers:
+  - backend: lmstudio
+    host: http://localhost:1234
+    model: nomic-ai/nomic-embed-code-GGUF
+`)
+
+	svc, err := NewConfigService(path)
+	if err != nil {
+		t.Fatalf("NewConfigService: %v", err)
+	}
+
+	servers := svc.Servers()
+	if len(servers) != 1 {
+		t.Fatalf("Servers() len = %d, want 1", len(servers))
+	}
+	if servers[0].Backend != BackendLMStudio {
+		t.Fatalf("Backend = %q, want %q", servers[0].Backend, BackendLMStudio)
+	}
+	if got := svc.ServerDims(0); got != 3584 {
+		t.Fatalf("ServerDims(0) = %d, want 3584", got)
+	}
+	if got := svc.ServerCtxLength(0); got != 8192 {
+		t.Fatalf("ServerCtxLength(0) = %d, want 8192", got)
+	}
+	if got := svc.ServerMinScore(0); got != 0.15 {
+		t.Fatalf("ServerMinScore(0) = %f, want 0.15", got)
+	}
+}
+
+func TestDocumentationFullConfigExample(t *testing.T) {
+	clearConfigDocEnv(t)
+	path := writeConfigDocFixture(t, `
+log_level: info
+max_chunk_tokens: 512
+freshness_ttl: 60s
+reindex_timeout: 0s
+
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: ordis/jina-embeddings-v2-base-code
+    dims: 768
+    ctx_length: 8192
+    min_score: 0.35
+`)
+
+	svc, err := NewConfigService(path)
+	if err != nil {
+		t.Fatalf("NewConfigService: %v", err)
+	}
+
+	if got := svc.LogLevel(); got != "info" {
+		t.Fatalf("LogLevel() = %q, want info", got)
+	}
+	if got := svc.MaxChunkTokens(); got != 512 {
+		t.Fatalf("MaxChunkTokens() = %d, want 512", got)
+	}
+	if got := svc.FreshnessTTL(); got != 60*time.Second {
+		t.Fatalf("FreshnessTTL() = %v, want 60s", got)
+	}
+	if got := svc.ReindexTimeout(); got != 0 {
+		t.Fatalf("ReindexTimeout() = %v, want 0", got)
+	}
+}
+
+func TestDocumentationMultiServerFailoverExample(t *testing.T) {
+	clearConfigDocEnv(t)
+	path := writeConfigDocFixture(t, `
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: ordis/jina-embeddings-v2-base-code
+  - backend: ollama
+    host: http://backup-ollama.example:11434
+    model: ordis/jina-embeddings-v2-base-code
+  - backend: lmstudio
+    host: http://localhost:1234
+    model: nomic-ai/nomic-embed-code-GGUF
+`)
+
+	svc, err := NewConfigService(path, WithServerSelection("ordis/jina-embeddings-v2-base-code", "ollama"))
+	if err != nil {
+		t.Fatalf("NewConfigService: %v", err)
+	}
+
+	servers := svc.Servers()
+	if len(servers) != 2 {
+		t.Fatalf("Servers() len = %d, want 2", len(servers))
+	}
+	if got := servers[0].Host; got != "http://localhost:11434" {
+		t.Fatalf("Servers()[0].Host = %q, want http://localhost:11434", got)
+	}
+	if got := servers[1].Host; got != "http://backup-ollama.example:11434" {
+		t.Fatalf("Servers()[1].Host = %q, want http://backup-ollama.example:11434", got)
+	}
+}
+
+func TestDocumentationCustomModelRequiresDims(t *testing.T) {
+	clearConfigDocEnv(t)
+	path := writeConfigDocFixture(t, `
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: my-custom-embedding-model
+    dims: 1024
+    ctx_length: 8192
+    min_score: 0.20
+`)
+
+	svc, err := NewConfigService(path)
+	if err != nil {
+		t.Fatalf("NewConfigService: %v", err)
+	}
+	if got := svc.ServerDims(0); got != 1024 {
+		t.Fatalf("ServerDims(0) = %d, want 1024", got)
+	}
+}
+
+func TestDocumentationEnvOverridesYAMLFirstServer(t *testing.T) {
+	clearConfigDocEnv(t)
+	path := writeConfigDocFixture(t, `
+max_chunk_tokens: 512
+servers:
+  - backend: ollama
+    host: http://localhost:11434
+    model: ordis/jina-embeddings-v2-base-code
+`)
+
+	t.Setenv("LUMEN_MAX_CHUNK_TOKENS", "2048")
+	t.Setenv("OLLAMA_HOST", "http://ollama.example:11434")
+
+	svc, err := NewConfigService(path)
+	if err != nil {
+		t.Fatalf("NewConfigService: %v", err)
+	}
+	if got := svc.MaxChunkTokens(); got != 2048 {
+		t.Fatalf("MaxChunkTokens() = %d, want 2048", got)
+	}
+	if got := svc.Servers()[0].Host; got != "http://ollama.example:11434" {
+		t.Fatalf("Servers()[0].Host = %q, want http://ollama.example:11434", got)
+	}
+}
+
+func repoRootFromConfigPackage(t *testing.T) string {
+	t.Helper()
+	wd, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("get cwd: %v", err)
+	}
+	return filepath.Clean(filepath.Join(wd, "..", ".."))
+}
+
+func TestConfigurationDocumentationIsLinkedFromREADME(t *testing.T) {
+	root := repoRootFromConfigPackage(t)
+
+	readme, err := os.ReadFile(filepath.Join(root, "README.md"))
+	if err != nil {
+		t.Fatalf("read README.md: %v", err)
+	}
+	if !strings.Contains(string(readme), "docs/CONFIGURATION.md") {
+		t.Fatalf("README.md must link to docs/CONFIGURATION.md")
+	}
+
+	doc, err := os.ReadFile(filepath.Join(root, "docs", "CONFIGURATION.md"))
+	if err != nil {
+		t.Fatalf("read docs/CONFIGURATION.md: %v", err)
+	}
+	for _, want := range []string{
+		"~/.config/lumen/config.yaml",
+		"$XDG_CONFIG_HOME/lumen/config.yaml",
+		"servers:",
+		"LUMEN_EMBED_MODEL",
+		"OLLAMA_HOST",
+		"LM_STUDIO_HOST",
+	} {
+		if !strings.Contains(string(doc), want) {
+			t.Fatalf("docs/CONFIGURATION.md missing %q", want)
+		}
+	}
+}