kalibr-ai · devonakelley · May 22, 2026 · May 22, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.14.4] - 2026-05-22
+
+### Fixed
+
+- **fix: heal loop returns best attempt instead of raising on exhaustion** — When all heal-loop paths are exhausted, the router now returns the last attempted response (with `kalibr_heal_exhausted = True`) instead of raising `RuntimeError`. Only raises if no response was ever received (e.g. network failure before any bytes arrived). Prevents benchmarks and callers from counting partial results as hard errors.
+
 ### Added
 
 - **Tavily Search provider** — `tavily/basic` and `tavily/advanced` as Router paths. Returns web search results wrapped in an OpenAI-compatible ChatCompletion shim so Thompson Sampling can compete Tavily against LLMs on web research goals. Set `TAVILY_API_KEY` env var.

diff --git a/kalibr/router.py b/kalibr/router.py
@@ -925,7 +925,22 @@ def _heal_dispatch(m_id: str, msgs: List[Dict], system_prompt: Optional[str] = N
                 router_span.set_attribute("kalibr.failure_category", failure_category)
 
                 err_msg = heal_result.get("error") or f"heal loop failed: {failure_category}"
-                raise RuntimeError(f"Heal loop exhausted all paths: {err_msg}")
+
+                # Return the best attempt instead of raising — a partial response is
+                # better than an exception for benchmarks and callers that can tolerate
+                # lower-quality output. Raise only if no response was ever received
+                # (e.g. network failure before any bytes arrived).
+                best_response = heal_result.get("response")
+                if best_response is None:
+                    raise RuntimeError(f"Heal loop exhausted all paths: {err_msg}")
+
+                best_response.kalibr_trace_id = trace_id
+                best_response.kalibr_healed = True
+                best_response.kalibr_heal_exhausted = True
+                best_response.kalibr_heal_count = heal_result.get("heal_count", 0)
+                best_response.kalibr_models_tried = heal_result.get("models_tried") or []
+                best_response.kalibr_model_used = used_model
+                return best_response
 
             # Step 5: Build ordered candidate paths for fallback
             # First: intelligence-selected path, then remaining registered paths

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "kalibr"
-version = "1.14.3"
+version = "1.14.4"
 description = "Outcome-aware LLM routing for production AI agents. Routes between models, tools, and parameters based on real success signals using Thompson Sampling. Automatic fallback, cost optimization, and continuous learning — no redeploy required."
 authors = [{name = "Kalibr Team", email = "support@kalibr.systems"}]
 readme = "README.md"