diff --git a/README.md b/README.md
index 0ece9978a..c2c9bad1b 100644
--- a/README.md
+++ b/README.md
@@ -323,6 +323,7 @@ These are only needed for **headless / CI extraction** (`graphify extract`). Whe
 | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | Google Gemini backend | `--backend gemini` |
 | `OPENAI_API_KEY` | OpenAI or OpenAI-compatible APIs | `--backend openai` |
 | `DEEPSEEK_API_KEY` | DeepSeek backend | `--backend deepseek` |
+| `OPENROUTER_API_KEY` | OpenRouter DeepSeek/Kimi backends | default for extraction via `openrouter-deepseek`; explicit `--backend openrouter-deepseek` or `--backend openrouter-kimi` |
 | `MOONSHOT_API_KEY` | Kimi Code backend | `--backend kimi` |
 | `OLLAMA_BASE_URL` | Ollama local inference URL | `--backend ollama` (default: `http://localhost:11434`) |
 | `OLLAMA_MODEL` | Ollama model name | `--backend ollama` (default: auto-detect) |
@@ -343,7 +344,7 @@ These are only needed for **headless / CI extraction** (`graphify extract`). Whe
 
 - **Code files** — processed locally via tree-sitter. Nothing leaves your machine.
 - **Video / audio** — transcribed locally with faster-whisper. Nothing leaves your machine.
-- **Docs, PDFs, images** — sent to your AI assistant for semantic extraction (via the `/graphify` skill, using whatever model your IDE session runs). Headless `graphify extract` requires `GEMINI_API_KEY` / `GOOGLE_API_KEY` (Gemini), `MOONSHOT_API_KEY` (Kimi), `ANTHROPIC_API_KEY` (Claude), `OPENAI_API_KEY` (OpenAI), `DEEPSEEK_API_KEY` (DeepSeek), a running Ollama instance (`OLLAMA_BASE_URL`), AWS credentials via the standard provider chain (Bedrock - no API key needed, uses IAM), or the `claude` CLI binary (Claude Code - no API key needed, uses your Claude subscription). The `--dedup-llm` flag uses the same key.
+- **Docs, PDFs, images** — sent to your AI assistant for semantic extraction (via the `/graphify` skill, using whatever model your IDE session runs). Headless `graphify extract` defaults to OpenRouter DeepSeek when `OPENROUTER_API_KEY` is set, or can use `DEEPSEEK_API_KEY` (DeepSeek), `GEMINI_API_KEY` / `GOOGLE_API_KEY` (Gemini), `MOONSHOT_API_KEY` (Kimi), `ANTHROPIC_API_KEY` (Claude), `OPENAI_API_KEY` (OpenAI), a running Ollama instance (`OLLAMA_BASE_URL`), AWS credentials via the standard provider chain (Bedrock - no API key needed, uses IAM), or the `claude` CLI binary (Claude Code - no API key needed, uses your Claude subscription). The `--dedup-llm` flag uses the same key.
 - No telemetry, no usage tracking, no analytics.
 
 ---
@@ -453,7 +454,8 @@ graphify kiro install / uninstall
 graphify antigravity install / uninstall
 
 graphify extract ./docs                        # headless LLM extraction for CI (no IDE needed)
-graphify extract ./docs --backend gemini       # explicit backend: gemini, kimi, claude, openai, deepseek, ollama, bedrock, or claude-cli
+graphify extract ./docs --backend openrouter-deepseek  # explicit OpenRouter DeepSeek backend (default when OPENROUTER_API_KEY is set)
+graphify extract ./docs --backend gemini       # explicit backend: openrouter-deepseek, openrouter-kimi, deepseek, gemini, kimi, claude, openai, ollama, bedrock, or claude-cli
 graphify extract ./docs --backend gemini --model gemini-3.1-pro-preview
 graphify extract ./docs --backend ollama       # local Ollama (set OLLAMA_BASE_URL / OLLAMA_MODEL) - no API key needed for loopback
 GRAPHIFY_OLLAMA_NUM_CTX=32768 graphify extract ./docs --backend ollama   # override KV-cache window (auto-sized by default)
@@ -470,6 +472,7 @@ graphify extract ./docs --force                # overwrite graph.json even if ne
 graphify extract ./docs --dedup-llm            # LLM tiebreaker for ambiguous entity pairs (uses same API key)
 graphify extract ./docs --global --as myrepo   # extract and register into the cross-project global graph
 GRAPHIFY_MAX_OUTPUT_TOKENS=32768 graphify extract ./docs --backend claude  # raise output cap for dense corpora
+graphify quality graphify-out/graph.json       # schema-quality gate for generated graph JSON
 
 graphify export callflow-html                       # graphify-out/<project>-callflow.html
 graphify export callflow-html --max-sections 8      # cap generated architecture sections
diff --git a/graphify/__main__.py b/graphify/__main__.py
index 895f626ef..4e5ddcab4 100644
--- a/graphify/__main__.py
+++ b/graphify/__main__.py
@@ -1241,6 +1241,8 @@ def main() -> None:
         print("    --context C             explicit edge-context filter (repeatable)")
         print("    --budget N              cap output at N tokens (default 2000)")
         print("    --graph <path>          path to graph.json (default graphify-out/graph.json)")
+        print("  quality [graph.json]     inspect graph.json schema quality")
+        print("    --json                  emit machine-readable quality report")
         print("  save-result             save a Q&A result to graphify-out/memory/ for graph feedback loop")
         print("    --question Q            the question asked")
         print("    --answer A              the answer to save")
@@ -1256,7 +1258,7 @@ def main() -> None:
         print("    --top-k-edges N         per-symbol outbound edges in inspector (default 12)")
         print("    --label NAME            project label in header")
         print("  extract <path>          headless full extraction (AST + semantic LLM) for CI/scripts")
-        print("    --backend B             gemini|kimi|claude|openai|deepseek|ollama (default: whichever API key is set)")
+        print("    --backend B             openrouter-deepseek|openrouter-kimi|deepseek|gemini|kimi|claude|openai|ollama (default: OpenRouter DeepSeek when OPENROUTER_API_KEY is set)")
         print("    --model M               override backend default model")
         print("    --max-workers N         AST extraction subprocess count (default: cpu_count)")
         print("    --token-budget N        per-chunk token cap for semantic extraction (default: 60000)")
@@ -1576,6 +1578,26 @@ def main() -> None:
             source_nodes=opts.nodes or None,
         )
         print(f"Saved to {out}")
+    elif cmd == "quality":
+        from graphify.quality import format_report, inspect_graph
+        graph_path = Path(_default_graph_path())
+        emit_json = False
+        for arg in sys.argv[2:]:
+            if arg == "--json":
+                emit_json = True
+            else:
+                graph_path = Path(arg)
+        try:
+            report = inspect_graph(graph_path)
+        except Exception as exc:
+            print(f"error: could not inspect graph quality: {exc}", file=sys.stderr)
+            sys.exit(1)
+        if emit_json:
+            print(json.dumps(report, indent=2))
+        else:
+            print(format_report(report))
+        if report["status"] != "pass":
+            sys.exit(1)
     elif cmd == "path":
         if len(sys.argv) < 4:
             print("Usage: graphify path \"<source>\" \"<target>\" [--graph path]", file=sys.stderr)
@@ -2402,7 +2424,7 @@ def _load_graph(p: str):
         # has an API key set.
         if len(sys.argv) < 3:
             print(
-                "Usage: graphify extract <path> [--backend gemini|kimi|claude|openai|deepseek|ollama] "
+                "Usage: graphify extract <path> [--backend openrouter-deepseek|openrouter-kimi|deepseek|gemini|kimi|claude|openai|ollama] "
                 "[--model M] [--out DIR] [--google-workspace] [--no-cluster] "
                 "[--max-workers N] [--token-budget N] [--max-concurrency N] "
                 "[--api-timeout S]",
@@ -2525,14 +2547,16 @@ def _parse_float(name: str, raw: str) -> float:
             extract_corpus_parallel as _extract_corpus_parallel,
             _format_backend_env_keys,
             _get_backend_api_key,
+            _sanitize_extraction_result,
         )
         if backend is None:
             backend = _detect_backend()
             if backend is None:
                 print(
-                    "error: no LLM API key found. Set GEMINI_API_KEY or GOOGLE_API_KEY "
-                    "(gemini), MOONSHOT_API_KEY (kimi), ANTHROPIC_API_KEY (claude), "
-                    "OPENAI_API_KEY (openai), DEEPSEEK_API_KEY (deepseek), "
+                    "error: no LLM API key found. Set OPENROUTER_API_KEY "
+                    "(default openrouter-deepseek), DEEPSEEK_API_KEY (deepseek), "
+                    "GEMINI_API_KEY or GOOGLE_API_KEY (gemini), MOONSHOT_API_KEY (kimi), "
+                    "ANTHROPIC_API_KEY (claude), OPENAI_API_KEY (openai), "
                     "or pass --backend.",
                     file=sys.stderr,
                 )
@@ -2677,11 +2701,16 @@ def _parse_float(name: str, raw: str) -> float:
             cached_nodes, cached_edges, cached_hyperedges, uncached_paths = (
                 _check_semantic_cache(sem_paths_str, root=target)
             )
+            cached_fragment = _sanitize_extraction_result({
+                "nodes": cached_nodes,
+                "edges": cached_edges,
+                "hyperedges": cached_hyperedges,
+            })
             sem_cache_hits = len(semantic_files) - len(uncached_paths)
             sem_cache_misses = len(uncached_paths)
-            sem_result["nodes"].extend(cached_nodes)
-            sem_result["edges"].extend(cached_edges)
-            sem_result["hyperedges"].extend(cached_hyperedges)
+            sem_result["nodes"].extend(cached_fragment["nodes"])
+            sem_result["edges"].extend(cached_fragment["edges"])
+            sem_result["hyperedges"].extend(cached_fragment["hyperedges"])
             if sem_cache_hits:
                 print(f"[graphify extract] semantic cache: {sem_cache_hits} hit / {sem_cache_misses} miss")
 
@@ -2722,6 +2751,7 @@ def _progress(idx: int, total: int, _result: dict) -> None:
                         file=sys.stderr,
                     )
                     fresh = {"nodes": [], "edges": [], "hyperedges": [], "input_tokens": 0, "output_tokens": 0}
+                fresh = _sanitize_extraction_result(fresh)
                 try:
                     _save_semantic_cache(
                         fresh.get("nodes", []),
@@ -2748,6 +2778,7 @@ def _progress(idx: int, total: int, _result: dict) -> None:
             "input_tokens": ast_result.get("input_tokens", 0) + sem_result.get("input_tokens", 0),
             "output_tokens": ast_result.get("output_tokens", 0) + sem_result.get("output_tokens", 0),
         }
+        merged = _sanitize_extraction_result(merged)
 
         graph_json_path = graphify_out / "graph.json"
         analysis_path = graphify_out / ".graphify_analysis.json"
diff --git a/graphify/build.py b/graphify/build.py
index cc229fdaa..9b0025b40 100644
--- a/graphify/build.py
+++ b/graphify/build.py
@@ -83,6 +83,18 @@ def _norm_source_file(p: str | None, root: str | None = None) -> str | None:
     return p
 
 
+def _dict_items(value: object) -> list[dict]:
+    """Return only dict entries from a graph list."""
+    if not isinstance(value, list):
+        return []
+    return [item for item in value if isinstance(item, dict)]
+
+
+def _label_from_id(node_id: str) -> str:
+    """Derive a readable fallback label from a node id."""
+    return " ".join(part for part in str(node_id).replace("-", "_").split("_") if part).title()
+
+
 def edge_data(G: nx.Graph, u: str, v: str) -> dict:
     """Return one edge attribute dict for (u, v), tolerating MultiGraph.
 
@@ -112,15 +124,17 @@ def build_from_json(extraction: dict, *, directed: bool = False, root: str | Pat
     root: if given, absolute source_file paths from semantic subagents are made
         relative to root so all nodes share a consistent path key (#932).
     """
+    extraction = dict(extraction)
     _root = str(Path(root).resolve()) if root else None
     # NetworkX <= 3.1 serialised edges as "links"; remap to "edges" for compatibility.
     if "edges" not in extraction and "links" in extraction:
-        extraction = dict(extraction, edges=extraction["links"])
+        extraction["edges"] = extraction["links"]
+
+    for key in ("nodes", "edges", "hyperedges"):
+        extraction[key] = _dict_items(extraction.get(key))
 
     # Canonicalize legacy node/edge schema before validation.
     for node in extraction.get("nodes", []):
-        if not isinstance(node, dict):
-            continue
         if "source" in node and "source_file" not in node:
             # Count edges that reference this node so the warning is actionable (#479)
             node_id = node.get("id", "?")
@@ -135,6 +149,8 @@ def build_from_json(extraction: dict, *, directed: bool = False, root: str | Pat
                 file=sys.stderr,
             )
             node["source_file"] = node.pop("source")
+        if not node.get("label"):
+            node["label"] = _label_from_id(node.get("id", ""))
         # Default missing/None file_type to "concept" so legacy graph.json
         # entries (and stub nodes preserved by `_rebuild_code` from older
         # graphify versions that didn't always populate file_type) don't
@@ -145,6 +161,20 @@ def build_from_json(extraction: dict, *, directed: bool = False, root: str | Pat
         if ft and ft not in {"code", "document", "paper", "image", "rationale", "concept"}:
             node["file_type"] = _FILE_TYPE_SYNONYMS.get(ft, "concept")
 
+    for edge in extraction.get("edges", []):
+        if "confience_score" in edge:
+            typo_score = edge.pop("confience_score")
+            if "confidence_score" not in edge:
+                edge["confidence_score"] = typo_score
+        if "source" not in edge and "from" in edge:
+            edge["source"] = edge["from"]
+        if "target" not in edge and "to" in edge:
+            edge["target"] = edge["to"]
+        if not edge.get("relation"):
+            edge["relation"] = "conceptually_related_to"
+        if not edge.get("source_file"):
+            edge["source_file"] = "unknown"
+
     errors = validate_extraction(extraction)
     # Dangling edges (stdlib/external imports) are expected - only warn about real schema errors.
     real_errors = [e for e in errors if "does not match any node id" not in e]
@@ -161,10 +191,6 @@ def build_from_json(extraction: dict, *, directed: bool = False, root: str | Pat
     # e.g. "Session_ValidateToken" maps to "session_validatetoken".
     norm_to_id: dict[str, str] = {_normalize_id(nid): nid for nid in node_set}
     for edge in extraction.get("edges", []):
-        if "source" not in edge and "from" in edge:
-            edge["source"] = edge["from"]
-        if "target" not in edge and "to" in edge:
-            edge["target"] = edge["to"]
         if "source" not in edge or "target" not in edge:
             continue
         src, tgt = edge["source"], edge["target"]
@@ -214,9 +240,9 @@ def build(
     from graphify.dedup import deduplicate_entities
     combined: dict = {"nodes": [], "edges": [], "hyperedges": [], "input_tokens": 0, "output_tokens": 0}
     for ext in extractions:
-        combined["nodes"].extend(ext.get("nodes", []))
-        combined["edges"].extend(ext.get("edges", []))
-        combined["hyperedges"].extend(ext.get("hyperedges", []))
+        combined["nodes"].extend(_dict_items(ext.get("nodes", [])))
+        combined["edges"].extend(_dict_items(ext.get("edges", [])))
+        combined["hyperedges"].extend(_dict_items(ext.get("hyperedges", [])))
         combined["input_tokens"] += ext.get("input_tokens", 0)
         combined["output_tokens"] += ext.get("output_tokens", 0)
     if dedup and combined["nodes"]:
diff --git a/graphify/cache.py b/graphify/cache.py
index 2052cf7aa..11813f131 100644
--- a/graphify/cache.py
+++ b/graphify/cache.py
@@ -14,6 +14,13 @@
 _GRAPHIFY_OUT = os.environ.get("GRAPHIFY_OUT", "graphify-out")
 
 
+def _dict_items(value: object) -> list[dict]:
+    """Return only dict entries from a cached/extracted graph list."""
+    if not isinstance(value, list):
+        return []
+    return [item for item in value if isinstance(item, dict)]
+
+
 def _body_content(content: bytes) -> bytes:
     """Strip YAML frontmatter from Markdown content, returning only the body."""
     text = content.decode(errors="replace")
@@ -280,9 +287,9 @@ def check_semantic_cache(
             p = Path(root) / p
         result = load_cached(p, root, kind="semantic")
         if result is not None:
-            cached_nodes.extend(result.get("nodes", []))
-            cached_edges.extend(result.get("edges", []))
-            cached_hyperedges.extend(result.get("hyperedges", []))
+            cached_nodes.extend(_dict_items(result.get("nodes", [])))
+            cached_edges.extend(_dict_items(result.get("edges", [])))
+            cached_hyperedges.extend(_dict_items(result.get("hyperedges", [])))
         else:
             uncached.append(fpath)
 
@@ -306,14 +313,20 @@ def save_semantic_cache(
 
     by_file: dict[str, dict] = defaultdict(lambda: {"nodes": [], "edges": [], "hyperedges": []})
     for n in nodes:
+        if not isinstance(n, dict):
+            continue
         src = n.get("source_file", "")
         if src:
             by_file[src]["nodes"].append(n)
     for e in edges:
+        if not isinstance(e, dict):
+            continue
         src = e.get("source_file", "")
         if src:
             by_file[src]["edges"].append(e)
     for h in (hyperedges or []):
+        if not isinstance(h, dict):
+            continue
         src = h.get("source_file", "")
         if src:
             by_file[src]["hyperedges"].append(h)
diff --git a/graphify/export.py b/graphify/export.py
index a71c927c7..47129893c 100644
--- a/graphify/export.py
+++ b/graphify/export.py
@@ -472,6 +472,11 @@ def _git_head() -> str | None:
         return None
 
 
+def _label_from_id(node_id: str) -> str:
+    """Derive a readable fallback label from a node id."""
+    return " ".join(part for part in str(node_id).replace("-", "_").split("_") if part).title()
+
+
 def to_json(G: nx.Graph, communities: dict[int, list[str]], output_path: str, *, force: bool = False, built_at_commit: str | None = None) -> bool:
     # Safety check: refuse to silently shrink an existing graph (#479)
     existing_path = Path(output_path)
@@ -499,9 +504,21 @@ def to_json(G: nx.Graph, communities: dict[int, list[str]], output_path: str, *,
     except TypeError:
         data = json_graph.node_link_data(G)
     for node in data["nodes"]:
+        if not node.get("label"):
+            node["label"] = _label_from_id(node.get("id", ""))
+        if not node.get("source_file"):
+            node["source_file"] = "unknown"
         node["community"] = node_community.get(node["id"])
         node["norm_label"] = _strip_diacritics(node.get("label", "")).lower()
     for link in data["links"]:
+        if "confience_score" in link:
+            typo_score = link.pop("confience_score")
+            if "confidence_score" not in link:
+                link["confidence_score"] = typo_score
+        if not link.get("relation"):
+            link["relation"] = "conceptually_related_to"
+        if not link.get("source_file"):
+            link["source_file"] = "unknown"
         if "confidence_score" not in link:
             conf = link.get("confidence", "EXTRACTED")
             link["confidence_score"] = _CONFIDENCE_SCORE_DEFAULTS.get(conf, 1.0)
diff --git a/graphify/llm.py b/graphify/llm.py
index 58786f681..b3395cb2d 100644
--- a/graphify/llm.py
+++ b/graphify/llm.py
@@ -1,5 +1,5 @@
 # Direct LLM backend for semantic extraction — supports Claude, Kimi K2.6,
-# Gemini, and OpenAI.
+# Gemini, OpenAI, DeepSeek, and OpenRouter.
 # Used by `graphify extract . --backend gemini` and the benchmark scripts.
 # The default graphify pipeline uses Claude Code subagents via skill.md;
 # this module provides a direct API path for non-Claude-Code environments.
@@ -7,6 +7,7 @@
 
 import json
 import os
+import shutil
 import sys
 import time
 from collections.abc import Callable
@@ -98,6 +99,24 @@ def _get_tokenizer():
         "temperature": 0,
         "max_tokens": 16384,
     },
+    "openrouter-deepseek": {
+        "base_url": "https://openrouter.ai/api/v1",
+        "default_model": "deepseek/deepseek-v4-flash",
+        "env_key": "OPENROUTER_API_KEY",
+        "model_env_key": "GRAPHIFY_OPENROUTER_DEEPSEEK_MODEL",
+        "pricing": {"input": 0.14, "output": 0.28},  # placeholder; OpenRouter billing is authoritative
+        "temperature": 0,
+        "max_tokens": 16384,
+    },
+    "openrouter-kimi": {
+        "base_url": "https://openrouter.ai/api/v1",
+        "default_model": "moonshotai/kimi-k2.6",
+        "env_key": "OPENROUTER_API_KEY",
+        "model_env_key": "GRAPHIFY_OPENROUTER_KIMI_MODEL",
+        "pricing": {"input": 0.74, "output": 4.66},  # placeholder; OpenRouter billing is authoritative
+        "temperature": 0,
+        "max_tokens": 8192,
+    },
     "bedrock": {
         "default_model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
         "model_env_key": "GRAPHIFY_BEDROCK_MODEL",
@@ -138,6 +157,13 @@ def _resolve_max_tokens(default: int) -> int:
 - EXTRACTED: relationship explicit in source (import, call, citation, reference)
 - INFERRED: reasonable inference (shared data structure, implied dependency)
 - AMBIGUOUS: uncertain — flag for review, do not omit
+- Prefer fewer, better nodes over exhaustive extraction.
+- Extract at most 12 nodes per input file and at most 24 edges per input file.
+- Omit generic low-value concepts unless they connect two specific source artifacts.
+- Keep labels short. Keep relation values from the schema list only.
+- Every item inside nodes, edges, and hyperedges must be a JSON object. Never emit strings in those arrays.
+- Use the field name source_file for provenance. Do not use source except as an edge endpoint.
+- Use confidence_score exactly. Do not misspell it.
 
 Node ID format: lowercase, only [a-z0-9_], no dots or slashes.
 Format: {stem}_{entity} where stem = filename without extension, entity = symbol name (both normalised).
@@ -166,6 +192,42 @@ def _read_files(paths: list[Path], root: Path) -> str:
 _LLM_JSON_MAX_BYTES = 10 * 1024 * 1024  # 10 MB hard cap before json.loads (F-016)
 
 
+def _merge_extra_body(kwargs: dict, extra_body: dict) -> None:
+    """Merge provider-specific OpenAI SDK extra_body values."""
+    current = kwargs.get("extra_body")
+    if isinstance(current, dict):
+        merged = dict(current)
+        merged.update(extra_body)
+        kwargs["extra_body"] = merged
+    else:
+        kwargs["extra_body"] = extra_body
+
+
+def _strip_json_fences(raw: str) -> str:
+    """Remove common markdown/code-fence wrapping around a JSON object."""
+    raw = raw.strip()
+    if raw.startswith("```"):
+        raw = raw.split("```", 2)[1]
+        if raw.lstrip().startswith("json"):
+            raw = raw.lstrip()[4:]
+        raw = raw.rsplit("```", 1)[0]
+    return raw.strip()
+
+
+def _remove_json_control_chars(raw: str) -> str:
+    """Drop ASCII control characters that make otherwise-valid JSON fail."""
+    return "".join(ch for ch in raw if ch in "\t\n\r" or ord(ch) >= 32)
+
+
+def _extract_json_object(raw: str) -> str:
+    """Return the outermost JSON object substring if prose leaked around it."""
+    start = raw.find("{")
+    end = raw.rfind("}")
+    if start != -1 and end != -1 and end > start:
+        return raw[start:end + 1]
+    return raw
+
+
 def _parse_llm_json(raw: str) -> dict:
     """Strip optional markdown fences and parse JSON. Returns empty fragment on failure.
 
@@ -179,17 +241,50 @@ def _parse_llm_json(raw: str) -> dict:
             file=sys.stderr,
         )
         return {"nodes": [], "edges": [], "hyperedges": []}
-    if raw.startswith("```"):
-        raw = raw.split("```", 2)[1]
-        if raw.startswith("json"):
-            raw = raw[4:]
-        raw = raw.rsplit("```", 1)[0]
-    try:
-        return json.loads(raw.strip())
-    except json.JSONDecodeError as exc:
-        print(f"[graphify] LLM returned invalid JSON, skipping chunk: {exc}", file=sys.stderr)
+    candidates = [
+        raw,
+        _strip_json_fences(raw),
+        _extract_json_object(_strip_json_fences(raw)),
+        _remove_json_control_chars(_extract_json_object(_strip_json_fences(raw))),
+    ]
+    last_exc: json.JSONDecodeError | None = None
+    for candidate in candidates:
+        try:
+            return json.loads(candidate.strip())
+        except json.JSONDecodeError as exc:
+            last_exc = exc
+    if last_exc is not None:
+        print(f"[graphify] LLM returned invalid JSON, skipping chunk: {last_exc}", file=sys.stderr)
+    else:
+        print("[graphify] LLM returned invalid JSON, skipping chunk", file=sys.stderr)
+    return {"nodes": [], "edges": [], "hyperedges": []}
+
+
+def _sanitize_extraction_result(result: dict) -> dict:
+    """Normalize parsed LLM output to lists of dicts expected downstream."""
+    if not isinstance(result, dict):
         return {"nodes": [], "edges": [], "hyperedges": []}
 
+    cleaned = dict(result)
+    dropped = 0
+    for key in ("nodes", "edges", "hyperedges"):
+        value = cleaned.get(key)
+        if not isinstance(value, list):
+            if value is not None:
+                dropped += 1
+            cleaned[key] = []
+            continue
+        good = [item for item in value if isinstance(item, dict)]
+        dropped += len(value) - len(good)
+        cleaned[key] = good
+
+    if dropped:
+        print(
+            f"[graphify] dropped {dropped} malformed LLM graph item(s)",
+            file=sys.stderr,
+        )
+    return cleaned
+
 
 def _response_is_hollow(raw_content: str | None, parsed: dict) -> bool:
     """Detect a successful HTTP response that yielded no usable extraction.
@@ -232,6 +327,18 @@ def _get_backend_api_key(backend: str) -> str:
     return ""
 
 
+def _backend_is_configured(backend: str) -> bool:
+    """Return whether a backend has the credentials or local runtime needed."""
+    if backend == "ollama":
+        _validate_ollama_base_url(os.environ.get("OLLAMA_BASE_URL", BACKENDS[backend]["base_url"]))
+        return True
+    if backend == "bedrock":
+        return bool(os.environ.get("AWS_PROFILE") or os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION"))
+    if backend == "claude-cli":
+        return bool(shutil.which("claude"))
+    return bool(_get_backend_api_key(backend))
+
+
 def _format_backend_env_keys(backend: str) -> str:
     """Return user-facing accepted API-key variable names."""
     keys = _backend_env_keys(backend)
@@ -291,15 +398,22 @@ def _call_openai_compat(
             {"role": "system", "content": _EXTRACTION_SYSTEM},
             {"role": "user", "content": user_message},
         ],
-        "max_completion_tokens": max_completion_tokens,
     }
+    if "openrouter.ai" in base_url:
+        # OpenRouter's OpenAI-compatible surface is happiest with max_tokens;
+        # some upstream providers reject max_completion_tokens.
+        kwargs["max_tokens"] = max_completion_tokens
+        kwargs["response_format"] = {"type": "json_object"}
+        _merge_extra_body(kwargs, {"provider": {"require_parameters": True}})
+    else:
+        kwargs["max_completion_tokens"] = max_completion_tokens
     if temperature is not None:
         kwargs["temperature"] = temperature
     if reasoning_effort is not None:
         kwargs["reasoning_effort"] = reasoning_effort
     # Kimi-k2.6 is a reasoning model — disable thinking so content isn't empty
     if "moonshot" in base_url:
-        kwargs["extra_body"] = {"thinking": {"type": "disabled"}}
+        _merge_extra_body(kwargs, {"thinking": {"type": "disabled"}})
     # Ollama defaults num_ctx to 2048 and silently truncates prompts larger
     # than that — the symptom is hollow 200 OK responses after the first few
     # chunks (#798). We derive num_ctx from the actual prompt size so we don't
@@ -348,7 +462,7 @@ def _call_openai_compat(
     if not resp.choices or resp.choices[0].message is None:
         raise ValueError("LLM returned empty or filtered response")
     raw_content = resp.choices[0].message.content
-    result = _parse_llm_json(raw_content or "{}")
+    result = _sanitize_extraction_result(_parse_llm_json(raw_content or "{}"))
     result["input_tokens"] = resp.usage.prompt_tokens if resp.usage else 0
     result["output_tokens"] = resp.usage.completion_tokens if resp.usage else 0
     result["model"] = model
@@ -402,7 +516,7 @@ def _call_claude(api_key: str, model: str, user_message: str, max_tokens: int =
         messages=[{"role": "user", "content": user_message}],
     )
     raw_content = resp.content[0].text if resp.content else None
-    result = _parse_llm_json(raw_content or "{}")
+    result = _sanitize_extraction_result(_parse_llm_json(raw_content or "{}"))
     result["input_tokens"] = resp.usage.input_tokens if resp.usage else 0
     result["output_tokens"] = resp.usage.output_tokens if resp.usage else 0
     result["model"] = model
@@ -427,7 +541,6 @@ def _call_claude_cli(user_message: str, max_tokens: int = 8192) -> dict:
     ANTHROPIC_API_KEY. Useful for Pro/Max subscribers who don't want to provision
     a pay-as-you-go API key just to run graphify's semantic pass.
     """
-    import shutil
     import subprocess
 
     if shutil.which("claude") is None:
@@ -464,7 +577,7 @@ def _call_claude_cli(user_message: str, max_tokens: int = 8192) -> dict:
         ) from exc
 
     raw_content = envelope.get("result", "")
-    result = _parse_llm_json(raw_content or "{}")
+    result = _sanitize_extraction_result(_parse_llm_json(raw_content or "{}"))
     usage = envelope.get("usage") or {}
     result["input_tokens"] = (
         int(usage.get("input_tokens", 0) or 0)
@@ -514,7 +627,7 @@ def _call_bedrock(model: str, user_message: str, max_tokens: int = 8192) -> dict
         raise RuntimeError(f"Bedrock API error ({code}): {msg}") from exc
 
     text = resp.get("output", {}).get("message", {}).get("content", [{}])[0].get("text", "{}")
-    result = _parse_llm_json(text)
+    result = _sanitize_extraction_result(_parse_llm_json(text))
     usage = resp.get("usage", {})
     result["input_tokens"] = usage.get("inputTokens", 0)
     result["output_tokens"] = usage.get("outputTokens", 0)
@@ -985,7 +1098,7 @@ def _call_llm(prompt: str, *, backend: str, max_tokens: int = 200) -> str:
         return resp.content[0].text if resp.content else ""
 
     if backend == "claude-cli":
-        import shutil, subprocess
+        import subprocess
         if shutil.which("claude") is None:
             raise RuntimeError("Claude Code CLI not found on $PATH")
         proc = subprocess.run(
@@ -1021,7 +1134,7 @@ def _call_llm(prompt: str, *, backend: str, max_tokens: int = 200) -> str:
         )
         return resp.get("output", {}).get("message", {}).get("content", [{}])[0].get("text", "")
 
-    # OpenAI-compatible (kimi, openai, gemini, ollama)
+    # OpenAI-compatible (Kimi, OpenAI, Gemini, Ollama, OpenRouter)
     try:
         from openai import OpenAI
     except ImportError as exc:
@@ -1030,8 +1143,11 @@ def _call_llm(prompt: str, *, backend: str, max_tokens: int = 200) -> str:
     kwargs: dict = {
         "model": mdl,
         "messages": [{"role": "user", "content": prompt}],
-        "max_completion_tokens": max_tokens,
     }
+    if "openrouter.ai" in cfg["base_url"]:
+        kwargs["max_tokens"] = max_tokens
+    else:
+        kwargs["max_completion_tokens"] = max_tokens
     temperature = cfg.get("temperature", 0)
     if temperature is not None:
         kwargs["temperature"] = temperature
@@ -1091,7 +1207,9 @@ def _validate_ollama_base_url(url: str) -> None:
 def detect_backend() -> str | None:
     """Return the name of whichever backend has an API key set, or None.
 
-    Priority: gemini → kimi → claude → openai → bedrock → ollama (last, opt-in).
+    Priority: GRAPHIFY_DEFAULT_BACKEND if valid and configured, then
+    OpenRouter DeepSeek → OpenRouter Kimi → direct DeepSeek → Gemini → Kimi →
+    Claude → OpenAI → Bedrock → Ollama (last, opt-in).
 
     Ollama is intentionally checked LAST so a paid API key (Anthropic/OpenAI/etc.)
     is never silently shadowed by an incidental OLLAMA_BASE_URL in the environment
@@ -1099,7 +1217,11 @@ def detect_backend() -> str | None:
     key now keeps you on the paid backend; remove the paid key (or pass
     --backend ollama explicitly) to route to the local model.
     """
-    for backend in ("gemini", "kimi", "claude", "openai", "deepseek"):
+    explicit = os.environ.get("GRAPHIFY_DEFAULT_BACKEND", "").strip()
+    if explicit in BACKENDS and _backend_is_configured(explicit):
+        return explicit
+
+    for backend in ("openrouter-deepseek", "openrouter-kimi", "deepseek", "gemini", "kimi", "claude", "openai"):
         if _get_backend_api_key(backend):
             return backend
     if os.environ.get("AWS_PROFILE") or os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION"):
diff --git a/graphify/prs.py b/graphify/prs.py
index cd0bc0e74..544eccf36 100644
--- a/graphify/prs.py
+++ b/graphify/prs.py
@@ -541,8 +541,11 @@ def render_pr_detail(pr: PRInfo, repo: str | None = None) -> None:
 
 # Best model per backend for reasoning tasks (different from extraction defaults)
 _TRIAGE_MODEL_DEFAULTS: dict[str, str] = {
+    "openrouter-deepseek": "deepseek/deepseek-v4-flash",
+    "deepseek": "deepseek-v4-flash",
     "claude": "claude-opus-4-7",
     "kimi":   "kimi-k2.6",
+    "openrouter-kimi": "moonshotai/kimi-k2.6",
     "openai": "gpt-4.1-mini",
     "gemini": "gemini-3-flash-preview",
 }
@@ -559,7 +562,7 @@ def _resolve_triage_backend() -> tuple[str, str]:
                  or _default_model_for_backend(explicit))
         return explicit, model
 
-    for b in ("claude", "kimi", "openai", "gemini"):
+    for b in ("openrouter-deepseek", "openrouter-kimi", "deepseek", "claude", "kimi", "openai", "gemini"):
         if _get_backend_api_key(b):
             model = (os.environ.get("GRAPHIFY_TRIAGE_MODEL")
                      or _TRIAGE_MODEL_DEFAULTS.get(b)
@@ -570,7 +573,7 @@ def _resolve_triage_backend() -> tuple[str, str]:
     if shutil.which("claude"):
         return "claude-cli", "claude-code-plan"
 
-    return "ollama", _default_model_for_backend("ollama")
+    raise RuntimeError("No triage backend configured. Set OPENROUTER_API_KEY (preferred) or GRAPHIFY_TRIAGE_BACKEND; refusing to fall back to a local Ollama model by default.")
 
 
 def triage_with_opus(prs: list[PRInfo], base: str) -> None:
@@ -624,7 +627,7 @@ def triage_with_opus(prs: list[PRInfo], base: str) -> None:
                     print(text.replace("\n", "\n  "), end="", flush=True)
             print("\n")
 
-        elif backend in ("kimi", "openai", "gemini", "ollama"):
+        elif backend in ("openrouter-deepseek", "deepseek", "kimi", "openrouter-kimi", "openai", "gemini", "ollama"):
             from openai import OpenAI
             cfg = BACKENDS[backend]
             api_key = _get_backend_api_key(backend) or "ollama"
diff --git a/graphify/quality.py b/graphify/quality.py
new file mode 100644
index 000000000..9f153da93
--- /dev/null
+++ b/graphify/quality.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+
+def _edge_list(data: dict[str, Any]) -> list[Any]:
+    if "links" in data:
+        value = data.get("links")
+    else:
+        value = data.get("edges")
+    return value if isinstance(value, list) else []
+
+
+def _node_list(data: dict[str, Any]) -> list[Any]:
+    value = data.get("nodes", [])
+    return value if isinstance(value, list) else []
+
+
+def inspect_graph(path: str | Path) -> dict[str, Any]:
+    """Return schema-quality counters for a graphify graph.json file."""
+    graph_path = Path(path)
+    data = json.loads(graph_path.read_text(encoding="utf-8"))
+    raw_nodes = data.get("nodes", [])
+    raw_edges = data.get("links") if "links" in data else data.get("edges", [])
+    nodes = _node_list(data)
+    edges = _edge_list(data)
+    node_dicts = [n for n in nodes if isinstance(n, dict)]
+    edge_dicts = [e for e in edges if isinstance(e, dict)]
+    node_ids = [n.get("id") for n in node_dicts if n.get("id")]
+    node_id_set = set(node_ids)
+
+    dangling_edges = 0
+    for edge in edge_dicts:
+        src = edge.get("source")
+        tgt = edge.get("target")
+        if src and src not in node_id_set:
+            dangling_edges += 1
+        if tgt and tgt not in node_id_set:
+            dangling_edges += 1
+
+    issues = {
+        "non_object_nodes": len(nodes) - len(node_dicts) if isinstance(raw_nodes, list) else 1,
+        "non_object_edges": len(edges) - len(edge_dicts) if isinstance(raw_edges, list) else 1,
+        "missing_node_ids": sum(1 for n in node_dicts if not n.get("id")),
+        "missing_node_labels": sum(1 for n in node_dicts if not n.get("label")),
+        "missing_node_source_files": sum(1 for n in node_dicts if not n.get("source_file")),
+        "missing_edge_sources": sum(1 for e in edge_dicts if not e.get("source")),
+        "missing_edge_targets": sum(1 for e in edge_dicts if not e.get("target")),
+        "missing_edge_relations": sum(1 for e in edge_dicts if not e.get("relation")),
+        "missing_edge_confidences": sum(1 for e in edge_dicts if not e.get("confidence")),
+        "missing_edge_source_files": sum(1 for e in edge_dicts if not e.get("source_file")),
+        "typo_confience_score_edges": sum(1 for e in edge_dicts if "confience_score" in e),
+        "duplicate_node_ids": len(node_ids) - len(node_id_set),
+        "dangling_edge_endpoints": dangling_edges,
+    }
+    total_issues = sum(issues.values())
+    return {
+        "path": str(graph_path),
+        "nodes": len(node_dicts),
+        "edges": len(edge_dicts),
+        "issues": issues,
+        "total_issues": total_issues,
+        "status": "pass" if total_issues == 0 else "fail",
+    }
+
+
+def format_report(report: dict[str, Any]) -> str:
+    """Return a concise human-readable graph quality report."""
+    lines = [
+        f"Graph quality: {report['status']}",
+        f"  path: {report['path']}",
+        f"  nodes: {report['nodes']}",
+        f"  edges: {report['edges']}",
+        f"  total issues: {report['total_issues']}",
+    ]
+    for key, value in report["issues"].items():
+        if value:
+            lines.append(f"  {key}: {value}")
+    return "\n".join(lines)
diff --git a/graphify/skill.md b/graphify/skill.md
index c3e39b3f4..546a8cd6d 100644
--- a/graphify/skill.md
+++ b/graphify/skill.md
@@ -88,7 +88,7 @@ The skill pipeline writes all intermediate and final outputs to `graphify-out/`
 graphify extract ./core/     # → ./core/graphify-out/graph.json
 graphify extract ./service/  # → ./service/graphify-out/graph.json
 graphify extract ./platform/ # → ./platform/graphify-out/graph.json
-# Add --backend gemini|kimi|openai|deepseek|claude-cli depending on which API key you have set
+# Add --backend openrouter-deepseek|openrouter-kimi|deepseek|gemini|kimi|openai|claude-cli depending on which API key you have set
 
 # Then merge at the project root:
 graphify merge-graphs \
diff --git a/pyproject.toml b/pyproject.toml
index cfdcd961d..f534be632 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,6 +62,7 @@ ollama = ["openai"]
 bedrock = ["boto3"]
 gemini = ["openai", "tiktoken"]
 openai = ["openai", "tiktoken"]
+openrouter = ["openai", "tiktoken"]
 sql = ["tree-sitter-sql"]
 all = ["mcp", "neo4j", "pypdf", "markdownify", "watchdog", "graspologic; python_version < '3.13'", "python-docx", "openpyxl", "faster-whisper", "yt-dlp", "matplotlib", "openai", "tiktoken", "boto3", "tree-sitter-sql"]
 
diff --git a/tests/test_build.py b/tests/test_build.py
index 85d59fd5e..9b74f2979 100644
--- a/tests/test_build.py
+++ b/tests/test_build.py
@@ -52,6 +52,34 @@ def test_legacy_edge_from_to_canonicalized():
     assert G.number_of_edges() == 1
 
 
+def test_malformed_llm_fields_are_canonicalized():
+    ext = {
+        "nodes": [
+            {"id": "memory_control_plane", "file_type": "document", "source_file": "a.md"},
+            {"id": "dex_system", "label": "Dex System", "file_type": "document", "source_file": "b.md"},
+        ],
+        "edges": [
+            {
+                "source": "memory_control_plane",
+                "target": "dex_system",
+                "confidence": "INFERRED",
+                "confidence_score": 0.9,
+                "confience_score": 0.8,
+                "source_file": "",
+            }
+        ],
+        "input_tokens": 0,
+        "output_tokens": 0,
+    }
+    G = build_from_json(ext)
+    assert G.nodes["memory_control_plane"]["label"] == "Memory Control Plane"
+    edge = G.edges["memory_control_plane", "dex_system"]
+    assert edge["relation"] == "conceptually_related_to"
+    assert edge["source_file"] == "unknown"
+    assert edge["confidence_score"] == 0.9
+    assert "confience_score" not in edge
+
+
 def test_source_file_backslash_normalized():
     """Windows backslash paths and POSIX paths for the same file must produce one node."""
     extraction = {
diff --git a/tests/test_export.py b/tests/test_export.py
index 832c87073..df00f4ec9 100644
--- a/tests/test_export.py
+++ b/tests/test_export.py
@@ -38,6 +38,39 @@ def test_to_json_nodes_have_community():
         for node in data["nodes"]:
             assert "community" in node
 
+
+def test_to_json_canonicalizes_salvageable_schema_fields():
+    G = build_from_json({
+        "nodes": [
+            {"id": "memory_control_plane", "file_type": "document", "source_file": "a.md"},
+            {"id": "dex_system", "label": "Dex System", "file_type": "document", "source_file": "b.md"},
+        ],
+        "edges": [
+            {
+                "source": "memory_control_plane",
+                "target": "dex_system",
+                "confidence": "INFERRED",
+                "confidence_score": 0.9,
+                "confience_score": 0.8,
+                "source_file": "",
+            }
+        ],
+        "input_tokens": 0,
+        "output_tokens": 0,
+    })
+    with tempfile.TemporaryDirectory() as tmp:
+        out = Path(tmp) / "graph.json"
+        to_json(G, {0: list(G.nodes)}, str(out), force=True)
+        data = json.loads(out.read_text())
+        node = next(n for n in data["nodes"] if n["id"] == "memory_control_plane")
+        link = data["links"][0]
+        assert node["label"] == "Memory Control Plane"
+        assert link["relation"] == "conceptually_related_to"
+        assert link["source_file"] == "unknown"
+        assert link["confidence_score"] == 0.9
+        assert "confience_score" not in link
+
+
 def test_to_cypher_creates_file():
     G = make_graph()
     with tempfile.TemporaryDirectory() as tmp:
diff --git a/tests/test_quality.py b/tests/test_quality.py
new file mode 100644
index 000000000..4042875ea
--- /dev/null
+++ b/tests/test_quality.py
@@ -0,0 +1,58 @@
+import json
+
+from graphify.quality import format_report, inspect_graph
+
+
+def test_quality_passes_clean_graph(tmp_path):
+    graph = {
+        "nodes": [{"id": "a", "label": "A", "source_file": "a.md"}],
+        "links": [],
+    }
+    path = tmp_path / "graph.json"
+    path.write_text(json.dumps(graph), encoding="utf-8")
+    report = inspect_graph(path)
+    assert report["status"] == "pass"
+    assert report["total_issues"] == 0
+
+
+def test_quality_reports_schema_defects(tmp_path):
+    graph = {
+        "nodes": [
+            {"id": "a", "source_file": ""},
+            "bad",
+            {"id": "a", "label": "Duplicate", "source_file": "a.md"},
+        ],
+        "links": [
+            {"source": "a", "target": "missing", "confidence": "INFERRED", "confience_score": 0.8},
+            "bad",
+        ],
+    }
+    path = tmp_path / "graph.json"
+    path.write_text(json.dumps(graph), encoding="utf-8")
+    report = inspect_graph(path)
+    assert report["status"] == "fail"
+    assert report["issues"]["non_object_nodes"] == 1
+    assert report["issues"]["non_object_edges"] == 1
+    assert report["issues"]["missing_node_labels"] == 1
+    assert report["issues"]["missing_node_source_files"] == 1
+    assert report["issues"]["missing_edge_relations"] == 1
+    assert report["issues"]["missing_edge_source_files"] == 1
+    assert report["issues"]["typo_confience_score_edges"] == 1
+    assert report["issues"]["duplicate_node_ids"] == 1
+    assert report["issues"]["dangling_edge_endpoints"] == 1
+    assert "Graph quality: fail" in format_report(report)
+
+
+def test_quality_reports_non_list_graph_fields(tmp_path):
+    graph = {
+        "nodes": {"id": "not_a_list"},
+        "links": {"source": "not_a_list"},
+    }
+    path = tmp_path / "graph.json"
+    path.write_text(json.dumps(graph), encoding="utf-8")
+    report = inspect_graph(path)
+    assert report["status"] == "fail"
+    assert report["nodes"] == 0
+    assert report["edges"] == 0
+    assert report["issues"]["non_object_nodes"] == 1
+    assert report["issues"]["non_object_edges"] == 1