From 5ec79538c1e4c2f7882dceb74f8f3bfbaa417e1a Mon Sep 17 00:00:00 2001
From: AXIS Contributor <axis-dev@example.invalid>
Date: Tue, 2 Jun 2026 06:25:36 -0400
Subject: [PATCH 1/2] Add Ollama warmth lifetime scoring as bounded placement
 tiebreaker

Promote the resident-model 'is loaded' boolean into a continuous 0.0-1.0
warmth score derived from Ollama's /api/ps expires_at and
default_keep_alive. Warmth becomes a bounded tiebreaker at position 10 of
the rank comparator, after RAM, GPU, pressure, and reservation ratio.

FilterCandidates is unchanged: warmth is consulted only among nodes that
already passed eval.Eligible(). It cannot promote an undersized node, and
the three-bucket discretization (cold/warm/hot at 0.5 and 0.9) keeps
ranking stable.

Probe layer reads the new fields when Ollama 0.3.10+ is present
('ollama ps -qq' JSON path), and degrades gracefully to the existing awk
parser on older Ollama - no expires_at is emitted in that case and
WarmthScore remains 0 (cold). /api/ps is also queried for
default_keep_alive, falling back to 5m when missing or unparseable.

Adds ResidentModel.ExpiresAt, ResidentModel.WarmthScore, and
OllamaInfo.DefaultKeepAlive (all omitempty, additive JSON), plus
ApplyOllamaWarmth / DefaultOllamaKeepAlive helpers in the facts layer and
modelWarmthRank in the ranker. Tests cover: warmth loses to allocatable
RAM, warmth breaks ties on equal RAM, warmth is ignored when FilterCandidates
rejects, boundary cases (0, 0.5, 0.51, 0.9, 0.91, 1.0), highest-relevant
wins, other-runtime warmth is ignored, and time math for zero / future /
past ExpiresAt.
---
 internal/facts/local.go           |  62 ++++++++
 internal/facts/tools.go           |  46 +++++-
 internal/models/types.go          |  26 +++-
 internal/placement/empirical.go   |  36 +++++
 internal/placement/ranker.go      |   6 +
 internal/placement/warmth_test.go | 231 ++++++++++++++++++++++++++++++
 6 files changed, 404 insertions(+), 3 deletions(-)
 create mode 100644 internal/placement/warmth_test.go

diff --git a/internal/facts/local.go b/internal/facts/local.go
index 525c8ce..0c9e195 100644
--- a/internal/facts/local.go
+++ b/internal/facts/local.go
@@ -995,11 +995,73 @@ func discoverOllamaLocal(ctx context.Context) (models.OllamaInfo, []models.Resid
 	// parse the JSON blob
 	var parsed ollamaDiscoveryPayload
 	if json.Unmarshal(out, &parsed) == nil {
+		ApplyOllamaWarmth(&parsed.OllamaInfo, parsed.ResidentModels)
 		return parsed.OllamaInfo, parsed.ResidentModels
 	}
 	return info, nil
 }
 
+// applyOllamaWarmth populates ExpiresAt and WarmthScore for each ResidentModel
+// from Ollama's /api/ps payload. The Ollama probe emits an `expires_at` field
+// per resident model and a process-level `default_keep_alive` duration
+// (Ollama 0.3.10+). Warmth is a continuous score in [0, 1] computed as
+// remaining / total, where total falls back to 5m (Ollama's stock default)
+// when `default_keep_alive` is absent or unparseable. When `expires_at` is
+// missing or already past, WarmthScore is 0 (cold). Both fields are
+// advisory metadata only — placement consumes them as a bounded
+// tiebreaker in internal/placement/ranker.go modelWarmthRank.
+//
+// Exported for testability from internal/placement and from
+// internal/facts tests.
+func ApplyOllamaWarmth(info *models.OllamaInfo, rms []models.ResidentModel) {
+	if len(rms) == 0 {
+		return
+	}
+	now := time.Now()
+	total := DefaultOllamaKeepAlive(info)
+	for i := range rms {
+		rm := &rms[i]
+		if rm.ExpiresAt.IsZero() {
+			continue
+		}
+		if !rm.ExpiresAt.After(now) {
+			rm.WarmthScore = 0
+			continue
+		}
+		remaining := rm.ExpiresAt.Sub(now)
+		score := float64(remaining) / float64(total)
+		if score < 0 {
+			score = 0
+		}
+		if score > 1 {
+			score = 1
+		}
+		rm.WarmthScore = score
+	}
+}
+
+// DefaultOllamaKeepAlive resolves the process-level default_keep_alive
+// duration from an Ollama /api/ps payload, falling back to 5m (Ollama's
+// stock default since 0.3.10) when the field is absent or unparseable.
+// Returns a positive duration on success. Exported for testability.
+func DefaultOllamaKeepAlive(info *models.OllamaInfo) time.Duration {
+	const fallback = 5 * time.Minute
+	if info == nil {
+		return fallback
+	}
+	if info.DefaultKeepAlive == "" {
+		return fallback
+	}
+	// `time.ParseDuration` accepts "5m", "1h30m", "-30s", etc. It does
+	// not accept a bare integer (seconds) — Ollama emits e.g. "5m" so
+	// this is fine. Negative durations are clamped to fallback.
+	d, err := time.ParseDuration(info.DefaultKeepAlive)
+	if err != nil || d <= 0 {
+		return fallback
+	}
+	return d
+}
+
 // discoverLlamaServerLocal probes for a running llama-server process and
 // returns its resident models. Returns nil if llama-server is not installed or
 // not running.
diff --git a/internal/facts/tools.go b/internal/facts/tools.go
index 2a53426..c60803d 100644
--- a/internal/facts/tools.go
+++ b/internal/facts/tools.go
@@ -40,13 +40,55 @@ const OllamaDiscoveryScript = `set -o pipefail;
 			LISTENING=true
 		fi
 		GPU=$($OLLAMA_BIN ps 2>/dev/null | grep -o 'gpu:[^ ]*' | head -1)
-		RESIDENT=$($OLLAMA_BIN ps 2>/dev/null | awk 'NR>1 && NF { proc=""; size_mb=0; for(i=1;i<=NF;i++){if($i~/[0-9]+%/){proc=$i" "$(i+1)} if(($i=="GB"||$i=="GiB")&&i>1&&($(i-1)+0)>0){size_mb=int($(i-1)*1024+0.5)} if(($i=="MB"||$i=="MiB")&&i>1&&($(i-1)+0)>0){size_mb=int($(i-1)+0.5)}} gsub(/"/, "\\\"", proc); printf "%s{\"name\":\"%s\",\"runtime\":\"ollama\",\"processor\":\"%s\",\"size_vram_mb\":%d,\"source\":\"ollama-ps\"}", (n++ ? "," : ""), $1, proc, size_mb }')
+		# 'ollama ps -qq' (added in Ollama 0.3.10) emits JSON: each entry
+		# includes name, expires_at (RFC3339) and size_vram. Parse it
+		# with python3 (always present on nodes with ollama) and emit
+		# one JSON object per model. Falls back to the existing awk
+		# parser when the JSON is unavailable (older Ollama).
+		PS_JSON=$($OLLAMA_BIN ps -qq 2>/dev/null || echo "")
+		if [ -n "$PS_JSON" ]; then
+			RESIDENT=$(printf '%s' "$PS_JSON" | python3 - 2>/dev/null <<'PYEOF' || echo ""
+import json, sys
+try:
+    entries = json.loads(sys.stdin.read() or "")
+except Exception:
+    sys.exit(0)
+out = []
+for e in entries:
+    name = e.get("name", "")
+    if not name:
+        continue
+    out.append(json.dumps({
+        "name": name,
+        "runtime": "ollama",
+        "processor": e.get("processor", "gpu"),
+        "size_vram_mb": int((e.get("size_vram") or 0) // (1024*1024)),
+        "source": "ollama-ps",
+        "expires_at": e.get("expires_at", ""),
+    }))
+print(",".join(out))
+PYEOF
+)
+		else
+			RESIDENT=""
+		fi
+		if [ -z "$RESIDENT" ]; then
+			# Fallback: original awk parser (older Ollama, no 'ps -qq').
+			# No expires_at field is emitted by this path; the local
+			# parser will leave ExpiresAt zero and WarmthScore at 0.
+			RESIDENT=$($OLLAMA_BIN ps 2>/dev/null | awk 'NR>1 && NF { proc=""; size_mb=0; for(i=1;i<=NF;i++){if($i~/[0-9]+%/){proc=$i" "$(i+1)} if(($i=="GB"||$i=="GiB")&&i>1&&($(i-1)+0)>0){size_mb=int($(i-1)*1024+0.5)} if(($i=="MB"||$i=="MiB")&&i>1&&($(i-1)+0)>0){size_mb=int($(i-1)+0.5)}} gsub(/"/, "\\\"", proc); printf "%s{\"name\":\"%s\",\"runtime\":\"ollama\",\"processor\":\"%s\",\"size_vram_mb\":%d,\"source\":\"ollama-ps\"}", (n++ ? "," : ""), $1, proc, size_mb }')
+		fi
 		if [ -n "$RESIDENT" ]; then
 			RESIDENT="[$RESIDENT]"
 		else
 			RESIDENT="[]"
 		fi
-		echo "{\"installed\":true,\"path\":\"$OLLAMA_BIN\",\"version\":\"${VERSION:-unknown}\",\"running\":$( [ -n \"$PGREP\" ] && echo true || echo false ),\"listening\":$LISTENING,\"port\":11434,\"models\":$MODELS,\"resident_models\":$RESIDENT,\"gpu_offload\":\"${GPU:-none}\"}"
+		# Process-level default_keep_alive (added in Ollama 0.3.10). Read
+		# from /api/ps; tolerate older Ollama (or versions that omit
+		# the field) by emitting an empty string. Treat null and any
+		# failure to parse as empty.
+		KEEPALIVE=$(curl -s --max-time 2 http://127.0.0.1:11434/api/ps 2>/dev/null | python3 -c "import sys,json; d=json.load(sys.stdin); v=d.get('default_keep_alive'); print('' if v is None else v)" 2>/dev/null || echo "")
+		echo "{\"installed\":true,\"path\":\"$OLLAMA_BIN\",\"version\":\"${VERSION:-unknown}\",\"running\":$( [ -n \"$PGREP\" ] && echo true || echo false ),\"listening\":$LISTENING,\"port\":11434,\"models\":$MODELS,\"resident_models\":$RESIDENT,\"gpu_offload\":\"${GPU:-none}\",\"default_keep_alive\":\"${KEEPALIVE}\"}"
 	`
 
 // LlamaServerDiscoveryScript is the bash script used to detect a running
diff --git a/internal/models/types.go b/internal/models/types.go
index 2d830c1..d42d41d 100644
--- a/internal/models/types.go
+++ b/internal/models/types.go
@@ -171,17 +171,41 @@ type OllamaInfo struct {
 	Port       int      `json:"port,omitempty" yaml:"port,omitempty"`
 	Models     []string `json:"models,omitempty" yaml:"models,omitempty"`
 	GPUOffload string   `json:"gpu_offload,omitempty" yaml:"gpu_offload,omitempty"`
-	Error      string   `json:"error,omitempty" yaml:"error,omitempty"`
+	// DefaultKeepAlive is the process-level Ollama default keep-alive
+	// duration string (e.g. "5m", "1h"). Populated from /api/ps on
+	// Ollama 0.3.10+; empty when unknown or on older Ollama. The warmth
+	// computation in internal/facts/local.go (applyOllamaWarmth) parses
+	// this and falls back to 5m when empty.
+	DefaultKeepAlive string `json:"default_keep_alive,omitempty" yaml:"default_keep_alive,omitempty"`
+	Error            string `json:"error,omitempty" yaml:"error,omitempty"`
 }
 
 // ResidentModel is additive truth-plane metadata describing a model that is
 // currently resident in a node runtime according to a live probe.
+//
+// ExpiresAt and WarmthScore are populated from Ollama's /api/ps
+// `expires_at` and `default_keep_alive` fields (Ollama 0.3.10+). They are
+// optional: when absent (older Ollama, no `keep_alive`, or other runtimes
+// such as llama-server / mlx_lm.server), both fields remain zero and
+// WarmthScore is treated as cold. The fields are advisory metadata only;
+// placement uses them as a bounded tiebreaker, never as a primary signal
+// (see internal/placement/ranker.go modelWarmthRank).
 type ResidentModel struct {
 	Name       string `json:"name" yaml:"name"`
 	Runtime    string `json:"runtime,omitempty" yaml:"runtime,omitempty"`
 	Processor  string `json:"processor,omitempty" yaml:"processor,omitempty"`
 	Source     string `json:"source,omitempty" yaml:"source,omitempty"`
 	SizeVRAMMB int64  `json:"size_vram_mb,omitempty" yaml:"size_vram_mb,omitempty"` // 0 = unknown/not reported by the runtime; currently populated only by the Ollama probe
+
+	// ExpiresAt is the wall-clock time at which the model is expected to
+	// be unloaded by the runtime. Zero when unknown.
+	ExpiresAt time.Time `json:"expires_at,omitempty" yaml:"expires_at,omitempty"`
+
+	// WarmthScore is a continuous 0.0–1.0 measure of how recently the
+	// model was loaded, derived from ExpiresAt and the runtime's
+	// default_keep_alive. 1.0 = freshly loaded, 0.0 = expired or unknown.
+	// Always non-negative; clamped to [0, 1] at compute time.
+	WarmthScore float64 `json:"warmth_score,omitempty" yaml:"warmth_score,omitempty"`
 }
 
 // TurboQuantInfo records whether a node appears able to run a TurboQuant-like
diff --git a/internal/placement/empirical.go b/internal/placement/empirical.go
index 3b67036..19c5913 100644
--- a/internal/placement/empirical.go
+++ b/internal/placement/empirical.go
@@ -206,10 +206,46 @@ func relevantResidentModels(n models.NodeFacts, reqs models.TaskRequirements) []
 	return relevant
 }
 
+// residentModelRank returns a higher score for nodes with a relevant
+// resident model already loaded. Currently a count; reserved for
+// future qualitative scoring (warmth is layered on as a separate
+// modelWarmthRank tiebreaker in ranker.go, not folded into this).
 func residentModelRank(n models.NodeFacts, reqs models.TaskRequirements) int {
 	return len(relevantResidentModels(n, reqs))
 }
 
+// modelWarmthRank returns a bounded rank ∈ {0, 1, 2} for the warmth
+// of the relevant resident model on a node: 0 = cold (unknown, expired,
+// or zero), 1 = warm (>0.5), 2 = hot (>0.9). When no relevant model is
+// loaded, returns 0. Used as a tiebreaker at position 10 in
+// RankCandidates — never a primary signal.
+func modelWarmthRank(n models.NodeFacts, reqs models.TaskRequirements) int {
+	relevant := relevantResidentModels(n, reqs)
+	if len(relevant) == 0 {
+		return 0
+	}
+	best := 0.0
+	for _, m := range relevant {
+		if m.WarmthScore > best {
+			best = m.WarmthScore
+		}
+	}
+	return warmthToRank(best)
+}
+
+// warmthToRank maps a continuous [0, 1] score to a 0/1/2 rank.
+// Boundaries: 0 = cold, 1 = warm (>0.5), 2 = hot (>0.9).
+func warmthToRank(score float64) int {
+	switch {
+	case score > 0.9:
+		return 2
+	case score > 0.5:
+		return 1
+	default:
+		return 0
+	}
+}
+
 func residentModelReason(n models.NodeFacts, reqs models.TaskRequirements) string {
 	modelsForReq := relevantResidentModels(n, reqs)
 	if len(modelsForReq) == 0 {
diff --git a/internal/placement/ranker.go b/internal/placement/ranker.go
index 25b24b1..a069416 100644
--- a/internal/placement/ranker.go
+++ b/internal/placement/ranker.go
@@ -81,6 +81,7 @@ func RankCandidates(candidates []models.NodeFacts, reqs models.TaskRequirements,
 		turboQuantRank          int
 		unifiedMemoryRank       int
 		pressureRank            int
+		modelWarmthRank         int
 		reservationRatio        float64
 		clusterReservationShare float64
 	}
@@ -132,6 +133,7 @@ func RankCandidates(candidates []models.NodeFacts, reqs models.TaskRequirements,
 			turboQuantRank:          turboQuantRank(n),
 			unifiedMemoryRank:       unifiedMemoryRank(n, reqs),
 			pressureRank:            pressureRank(pressureOf(n)),
+			modelWarmthRank:         modelWarmthRank(n, reqs),
 			reservationRatio:        reservationRatio(n),
 			clusterReservationShare: share,
 		}
@@ -176,6 +178,10 @@ func RankCandidates(candidates []models.NodeFacts, reqs models.TaskRequirements,
 			return keys[i].pressureRank < keys[j].pressureRank
 		}
 
+		if keys[i].modelWarmthRank != keys[j].modelWarmthRank {
+			return keys[i].modelWarmthRank > keys[j].modelWarmthRank
+		}
+
 		if keys[i].reservationRatio != keys[j].reservationRatio {
 			return keys[i].reservationRatio < keys[j].reservationRatio
 		}
diff --git a/internal/placement/warmth_test.go b/internal/placement/warmth_test.go
new file mode 100644
index 0000000..079ccdd
--- /dev/null
+++ b/internal/placement/warmth_test.go
@@ -0,0 +1,231 @@
+package placement
+
+import (
+	"testing"
+	"time"
+
+	"github.com/toasterbook88/axis/internal/facts"
+	"github.com/toasterbook88/axis/internal/models"
+)
+
+// TestRankCandidatesWarmthLosesToAllocatableRAM verifies the v2 critical-fix
+// invariant: warmth is a bounded tiebreaker, never a primary signal. A small
+// node with a hot model must not outrank a large node with a cold model.
+func TestRankCandidatesWarmthLosesToAllocatableRAM(t *testing.T) {
+	// 4GB total, 2GB free → only just passes a 1GB requirement.
+	// Hot ollama model loaded (warmth=1.0).
+	hot := nodeComplete("hot-small", 2000, "none", "ollama")
+	hot.Ollama = &models.OllamaInfo{Installed: true, Running: true}
+	hot.ResidentModels = []models.ResidentModel{
+		{Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 1.0},
+	}
+
+	// 16GB total, 14GB free, no resident model.
+	cold := nodeComplete("cold-large", 14000, "none", "ollama")
+	cold.Ollama = &models.OllamaInfo{Installed: true, Running: true}
+
+	reqs := models.TaskRequirements{
+		RequiredTools: []string{"ollama"},
+		MinFreeRAMMB:  1024,
+		Workload:      models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference},
+	}
+
+	ranked := RankCandidates([]models.NodeFacts{hot, cold}, reqs, nil)
+	if ranked[0].Name != "cold-large" {
+		t.Fatalf("expected cold-large to win on allocatable RAM regardless of warmth, got %s", ranked[0].Name)
+	}
+}
+
+// TestRankCandidatesWarmthBreaksTieOnEqualAllocatableRAM verifies the v2
+// bounded-tiebreaker behavior: two equally-RAM-eligible nodes differ only on
+// warmth, and the warmer node wins.
+func TestRankCandidatesWarmthBreaksTieOnEqualAllocatableRAM(t *testing.T) {
+	alpha := nodeComplete("alpha", 8000, "none", "ollama")
+	alpha.Ollama = &models.OllamaInfo{Installed: true, Running: true}
+	alpha.ResidentModels = []models.ResidentModel{
+		{Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 0.0},
+	}
+
+	beta := nodeComplete("beta", 8000, "none", "ollama")
+	beta.Ollama = &models.OllamaInfo{Installed: true, Running: true}
+	beta.ResidentModels = []models.ResidentModel{
+		{Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 1.0},
+	}
+
+	reqs := models.TaskRequirements{
+		RequiredTools: []string{"ollama"},
+		MinFreeRAMMB:  1024,
+		Workload:      models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference},
+	}
+
+	ranked := RankCandidates([]models.NodeFacts{alpha, beta}, reqs, nil)
+	if ranked[0].Name != "beta" {
+		t.Fatalf("expected warm beta to win on warmth tiebreaker, got %s", ranked[0].Name)
+	}
+}
+
+// TestRankCandidatesWarmthFilteredBeforeRanking verifies the v2 safety
+// invariant: warmth is never consulted on a node that fails FilterCandidates
+// due to RAM shortfall.
+func TestRankCandidatesWarmthFilteredBeforeRanking(t *testing.T) {
+	// Hot model, but only 100MB free → fails the 1GB filter.
+	hot := nodeComplete("hot-tiny", 100, "none", "ollama")
+	hot.Resources.RAMTotalMB = 4096 // keep total small so reservable is small too
+	hot.Ollama = &models.OllamaInfo{Installed: true, Running: true}
+	hot.ResidentModels = []models.ResidentModel{
+		{Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 1.0},
+	}
+
+	cold := nodeComplete("cold-large", 8000, "none", "ollama")
+	cold.Ollama = &models.OllamaInfo{Installed: true, Running: true}
+
+	reqs := models.TaskRequirements{
+		RequiredTools: []string{"ollama"},
+		MinFreeRAMMB:  1024,
+		Workload:      models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference},
+	}
+
+	filtered := FilterCandidates(reqs, []models.NodeFacts{hot, cold}, nil)
+	if len(filtered) != 1 || filtered[0].Name != "cold-large" {
+		t.Fatalf("expected FilterCandidates to drop hot-tiny (insufficient RAM), got %v", names(filtered))
+	}
+}
+
+// TestWarmthToRankBoundaries pins the bucket boundaries: cold (0), warm
+// (>0.5), hot (>0.9). Exact thresholds must behave predictably.
+func TestWarmthToRankBoundaries(t *testing.T) {
+	cases := []struct {
+		score float64
+		want  int
+	}{
+		{-0.1, 0}, // negative → cold
+		{0.0, 0},  // zero → cold
+		{0.5, 0},  // exactly threshold → cold (not >)
+		{0.51, 1}, // just above → warm
+		{0.9, 1},  // exactly threshold → warm (not >)
+		{0.91, 2}, // just above → hot
+		{1.0, 2},  // max → hot
+		{2.0, 2},  // above 1 (defensive) → hot
+	}
+	for _, c := range cases {
+		got := warmthToRank(c.score)
+		if got != c.want {
+			t.Errorf("warmthToRank(%v) = %d, want %d", c.score, got, c.want)
+		}
+	}
+}
+
+// TestModelWarmthRankPicksHighestRelevant verifies that when a node has
+// multiple relevant resident models, the highest warmth wins.
+func TestModelWarmthRankPicksHighestRelevant(t *testing.T) {
+	n := nodeComplete("n", 8000, "none", "ollama")
+	n.Ollama = &models.OllamaInfo{Installed: true, Running: true}
+	n.ResidentModels = []models.ResidentModel{
+		{Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 0.0},
+		{Name: "qwen2:7b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 0.6},
+	}
+	reqs := models.TaskRequirements{
+		RequiredTools: []string{"ollama"},
+		Workload:      models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference},
+	}
+	if got := modelWarmthRank(n, reqs); got != 1 {
+		t.Fatalf("expected rank 1 (warm) from best of {0.0, 0.6}, got %d", got)
+	}
+}
+
+// TestModelWarmthRankIgnoresOtherRuntimes verifies that warmth on a
+// non-relevant runtime (e.g. llama.cpp) does not affect an ollama task's
+// ranking — only ollama resident models count.
+func TestModelWarmthRankIgnoresOtherRuntimes(t *testing.T) {
+	n := nodeComplete("n", 8000, "none", "ollama")
+	n.Ollama = &models.OllamaInfo{Installed: true, Running: true}
+	// Resident model is llama.cpp, but task requires ollama. The warmth
+	// on the llama.cpp entry must be ignored.
+	n.ResidentModels = []models.ResidentModel{
+		{Name: "llama3:8b", Runtime: "llama.cpp", Source: "proc-cmdline", WarmthScore: 1.0},
+	}
+	reqs := models.TaskRequirements{
+		RequiredTools: []string{"ollama"},
+		Workload:      models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference},
+	}
+	if got := modelWarmthRank(n, reqs); got != 0 {
+		t.Fatalf("expected rank 0 (no relevant model), got %d", got)
+	}
+}
+
+// TestApplyOllamaWarmthTimeZero verifies the fact-layer helper: when
+// ExpiresAt is zero, WarmthScore stays zero. This is the "older Ollama
+// or no keep_alive" graceful-degradation path.
+func TestApplyOllamaWarmthTimeZero(t *testing.T) {
+	rms := []models.ResidentModel{
+		{Name: "m1", Runtime: "ollama", Source: "ollama-ps"},
+	}
+	info := &models.OllamaInfo{Installed: true}
+	facts.ApplyOllamaWarmth(info, rms)
+	if rms[0].WarmthScore != 0 {
+		t.Fatalf("expected WarmthScore=0 for zero ExpiresAt, got %v", rms[0].WarmthScore)
+	}
+}
+
+// TestApplyOllamaWarmthInFuturePopulates verifies that a future ExpiresAt
+// yields a non-zero WarmthScore.
+func TestApplyOllamaWarmthInFuturePopulates(t *testing.T) {
+	rms := []models.ResidentModel{
+		{Name: "m1", Runtime: "ollama", Source: "ollama-ps", ExpiresAt: time.Now().Add(2 * time.Minute)},
+	}
+	info := &models.OllamaInfo{Installed: true, DefaultKeepAlive: "5m"}
+	facts.ApplyOllamaWarmth(info, rms)
+	if rms[0].WarmthScore <= 0 {
+		t.Fatalf("expected positive WarmthScore, got %v", rms[0].WarmthScore)
+	}
+	if rms[0].WarmthScore > 1 {
+		t.Fatalf("expected WarmthScore ≤ 1, got %v", rms[0].WarmthScore)
+	}
+}
+
+// TestApplyOllamaWarmthPastExpiresAtIsCold verifies that an already-expired
+// ExpiresAt is treated as cold (WarmthScore=0), not negative.
+func TestApplyOllamaWarmthPastExpiresAtIsCold(t *testing.T) {
+	rms := []models.ResidentModel{
+		{Name: "m1", Runtime: "ollama", Source: "ollama-ps", ExpiresAt: time.Now().Add(-1 * time.Minute)},
+	}
+	info := &models.OllamaInfo{Installed: true, DefaultKeepAlive: "5m"}
+	facts.ApplyOllamaWarmth(info, rms)
+	if rms[0].WarmthScore != 0 {
+		t.Fatalf("expected WarmthScore=0 for past ExpiresAt, got %v", rms[0].WarmthScore)
+	}
+}
+
+// TestDefaultOllamaKeepAliveFallbacks verifies the helper resolves 5m when
+// DefaultKeepAlive is empty, unparseable, or negative.
+func TestDefaultOllamaKeepAliveFallbacks(t *testing.T) {
+	cases := []struct {
+		name string
+		info *models.OllamaInfo
+	}{
+		{"nil", nil},
+		{"empty", &models.OllamaInfo{DefaultKeepAlive: ""}},
+		{"garbage", &models.OllamaInfo{DefaultKeepAlive: "not-a-duration"}},
+		{"negative", &models.OllamaInfo{DefaultKeepAlive: "-30s"}},
+		{"zero", &models.OllamaInfo{DefaultKeepAlive: "0s"}},
+	}
+	for _, c := range cases {
+		got := facts.DefaultOllamaKeepAlive(c.info)
+		if got != 5*time.Minute {
+			t.Errorf("%s: expected 5m fallback, got %v", c.name, got)
+		}
+	}
+}
+
+// TestDefaultOllamaKeepAliveParses verifies the helper accepts valid
+// duration strings and returns them unchanged.
+func TestDefaultOllamaKeepAliveParses(t *testing.T) {
+	info := &models.OllamaInfo{DefaultKeepAlive: "1h"}
+	if got := facts.DefaultOllamaKeepAlive(info); got != time.Hour {
+		t.Fatalf("expected 1h, got %v", got)
+	}
+	info.DefaultKeepAlive = "30s"
+	if got := facts.DefaultOllamaKeepAlive(info); got != 30*time.Second {
+		t.Fatalf("expected 30s, got %v", got)
+	}
+}

From 9ff97d619a08b1d0748f78770d0662f15dac1337 Mon Sep 17 00:00:00 2001
From: AXIS Contributor <axis-dev@example.invalid>
Date: Tue, 2 Jun 2026 12:40:01 -0400
Subject: [PATCH 2/2] fix(facts): make Ollama discovery script and keep-alive
 duration parsing robust

Improve robustness of the Ollama resident model discovery script by handling non-list JSON formats and string type conversions for VRAM size safely.

Handle bare integer keep-alive duration strings in DefaultOllamaKeepAlive by appending seconds ("s") unit prior to duration parsing.

Addresses review comments from gemini-code-assist[bot] on PR 151.

Co-Authored-By: Antigravity <noreply@gemini.google.com>
---
 internal/facts/local.go           | 12 ++++++----
 internal/facts/tools.go           | 40 +++++++++++++++++++------------
 internal/placement/warmth_test.go | 24 +++++++++++++------
 3 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/internal/facts/local.go b/internal/facts/local.go
index 0c9e195..30ad05d 100644
--- a/internal/facts/local.go
+++ b/internal/facts/local.go
@@ -1049,13 +1049,15 @@ func DefaultOllamaKeepAlive(info *models.OllamaInfo) time.Duration {
 	if info == nil {
 		return fallback
 	}
-	if info.DefaultKeepAlive == "" {
+	val := strings.TrimSpace(info.DefaultKeepAlive)
+	if val == "" {
 		return fallback
 	}
-	// `time.ParseDuration` accepts "5m", "1h30m", "-30s", etc. It does
-	// not accept a bare integer (seconds) — Ollama emits e.g. "5m" so
-	// this is fine. Negative durations are clamped to fallback.
-	d, err := time.ParseDuration(info.DefaultKeepAlive)
+	// If it's a bare integer (seconds), append "s" so ParseDuration can parse it.
+	if _, err := strconv.Atoi(val); err == nil {
+		val += "s"
+	}
+	d, err := time.ParseDuration(val)
 	if err != nil || d <= 0 {
 		return fallback
 	}
diff --git a/internal/facts/tools.go b/internal/facts/tools.go
index c60803d..3db723b 100644
--- a/internal/facts/tools.go
+++ b/internal/facts/tools.go
@@ -50,23 +50,33 @@ const OllamaDiscoveryScript = `set -o pipefail;
 			RESIDENT=$(printf '%s' "$PS_JSON" | python3 - 2>/dev/null <<'PYEOF' || echo ""
 import json, sys
 try:
-    entries = json.loads(sys.stdin.read() or "")
+    data = json.loads(sys.stdin.read() or "[]")
+    entries = data.get("models", data) if isinstance(data, dict) else data
+    if not isinstance(entries, list):
+        entries = []
+    out = []
+    for e in entries:
+        if not isinstance(e, dict):
+            continue
+        name = e.get("name", "")
+        if not name:
+            continue
+        vram = e.get("size_vram")
+        try:
+            vram_val = int(vram) if vram is not None else 0
+        except (ValueError, TypeError):
+            vram_val = 0
+        out.append(json.dumps({
+            "name": name,
+            "runtime": "ollama",
+            "processor": e.get("processor", "gpu"),
+            "size_vram_mb": vram_val // (1024*1024),
+            "source": "ollama-ps",
+            "expires_at": e.get("expires_at", ""),
+        }))
+    print(",".join(out))
 except Exception:
     sys.exit(0)
-out = []
-for e in entries:
-    name = e.get("name", "")
-    if not name:
-        continue
-    out.append(json.dumps({
-        "name": name,
-        "runtime": "ollama",
-        "processor": e.get("processor", "gpu"),
-        "size_vram_mb": int((e.get("size_vram") or 0) // (1024*1024)),
-        "source": "ollama-ps",
-        "expires_at": e.get("expires_at", ""),
-    }))
-print(",".join(out))
 PYEOF
 )
 		else
diff --git a/internal/placement/warmth_test.go b/internal/placement/warmth_test.go
index 079ccdd..785c2d7 100644
--- a/internal/placement/warmth_test.go
+++ b/internal/placement/warmth_test.go
@@ -218,14 +218,24 @@ func TestDefaultOllamaKeepAliveFallbacks(t *testing.T) {
 }
 
 // TestDefaultOllamaKeepAliveParses verifies the helper accepts valid
-// duration strings and returns them unchanged.
+// duration strings and bare integers representing seconds, and returns
+// them parsed correctly.
 func TestDefaultOllamaKeepAliveParses(t *testing.T) {
-	info := &models.OllamaInfo{DefaultKeepAlive: "1h"}
-	if got := facts.DefaultOllamaKeepAlive(info); got != time.Hour {
-		t.Fatalf("expected 1h, got %v", got)
+	cases := []struct {
+		input    string
+		expected time.Duration
+	}{
+		{"1h", time.Hour},
+		{"30s", 30 * time.Second},
+		{"300", 5 * time.Minute},
+		{"1200", 20 * time.Minute},
+		{"30", 30 * time.Second},
+		{" 600  ", 10 * time.Minute},
 	}
-	info.DefaultKeepAlive = "30s"
-	if got := facts.DefaultOllamaKeepAlive(info); got != 30*time.Second {
-		t.Fatalf("expected 30s, got %v", got)
+	for _, c := range cases {
+		info := &models.OllamaInfo{DefaultKeepAlive: c.input}
+		if got := facts.DefaultOllamaKeepAlive(info); got != c.expected {
+			t.Errorf("input %q: expected %v, got %v", c.input, c.expected, got)
+		}
 	}
 }