From 5ec79538c1e4c2f7882dceb74f8f3bfbaa417e1a Mon Sep 17 00:00:00 2001 From: AXIS Contributor Date: Tue, 2 Jun 2026 06:25:36 -0400 Subject: [PATCH 1/2] Add Ollama warmth lifetime scoring as bounded placement tiebreaker Promote the resident-model 'is loaded' boolean into a continuous 0.0-1.0 warmth score derived from Ollama's /api/ps expires_at and default_keep_alive. Warmth becomes a bounded tiebreaker at position 10 of the rank comparator, after RAM, GPU, pressure, and reservation ratio. FilterCandidates is unchanged: warmth is consulted only among nodes that already passed eval.Eligible(). It cannot promote an undersized node, and the three-bucket discretization (cold/warm/hot at 0.5 and 0.9) keeps ranking stable. Probe layer reads the new fields when Ollama 0.3.10+ is present ('ollama ps -qq' JSON path), and degrades gracefully to the existing awk parser on older Ollama - no expires_at is emitted in that case and WarmthScore remains 0 (cold). /api/ps is also queried for default_keep_alive, falling back to 5m when missing or unparseable. Adds ResidentModel.ExpiresAt, ResidentModel.WarmthScore, and OllamaInfo.DefaultKeepAlive (all omitempty, additive JSON), plus ApplyOllamaWarmth / DefaultOllamaKeepAlive helpers in the facts layer and modelWarmthRank in the ranker. Tests cover: warmth loses to allocatable RAM, warmth breaks ties on equal RAM, warmth is ignored when FilterCandidates rejects, boundary cases (0, 0.5, 0.51, 0.9, 0.91, 1.0), highest-relevant wins, other-runtime warmth is ignored, and time math for zero / future / past ExpiresAt. --- internal/facts/local.go | 62 ++++++++ internal/facts/tools.go | 46 +++++- internal/models/types.go | 26 +++- internal/placement/empirical.go | 36 +++++ internal/placement/ranker.go | 6 + internal/placement/warmth_test.go | 231 ++++++++++++++++++++++++++++++ 6 files changed, 404 insertions(+), 3 deletions(-) create mode 100644 internal/placement/warmth_test.go diff --git a/internal/facts/local.go b/internal/facts/local.go index 525c8ce..0c9e195 100644 --- a/internal/facts/local.go +++ b/internal/facts/local.go @@ -995,11 +995,73 @@ func discoverOllamaLocal(ctx context.Context) (models.OllamaInfo, []models.Resid // parse the JSON blob var parsed ollamaDiscoveryPayload if json.Unmarshal(out, &parsed) == nil { + ApplyOllamaWarmth(&parsed.OllamaInfo, parsed.ResidentModels) return parsed.OllamaInfo, parsed.ResidentModels } return info, nil } +// applyOllamaWarmth populates ExpiresAt and WarmthScore for each ResidentModel +// from Ollama's /api/ps payload. The Ollama probe emits an `expires_at` field +// per resident model and a process-level `default_keep_alive` duration +// (Ollama 0.3.10+). Warmth is a continuous score in [0, 1] computed as +// remaining / total, where total falls back to 5m (Ollama's stock default) +// when `default_keep_alive` is absent or unparseable. When `expires_at` is +// missing or already past, WarmthScore is 0 (cold). Both fields are +// advisory metadata only — placement consumes them as a bounded +// tiebreaker in internal/placement/ranker.go modelWarmthRank. +// +// Exported for testability from internal/placement and from +// internal/facts tests. +func ApplyOllamaWarmth(info *models.OllamaInfo, rms []models.ResidentModel) { + if len(rms) == 0 { + return + } + now := time.Now() + total := DefaultOllamaKeepAlive(info) + for i := range rms { + rm := &rms[i] + if rm.ExpiresAt.IsZero() { + continue + } + if !rm.ExpiresAt.After(now) { + rm.WarmthScore = 0 + continue + } + remaining := rm.ExpiresAt.Sub(now) + score := float64(remaining) / float64(total) + if score < 0 { + score = 0 + } + if score > 1 { + score = 1 + } + rm.WarmthScore = score + } +} + +// DefaultOllamaKeepAlive resolves the process-level default_keep_alive +// duration from an Ollama /api/ps payload, falling back to 5m (Ollama's +// stock default since 0.3.10) when the field is absent or unparseable. +// Returns a positive duration on success. Exported for testability. +func DefaultOllamaKeepAlive(info *models.OllamaInfo) time.Duration { + const fallback = 5 * time.Minute + if info == nil { + return fallback + } + if info.DefaultKeepAlive == "" { + return fallback + } + // `time.ParseDuration` accepts "5m", "1h30m", "-30s", etc. It does + // not accept a bare integer (seconds) — Ollama emits e.g. "5m" so + // this is fine. Negative durations are clamped to fallback. + d, err := time.ParseDuration(info.DefaultKeepAlive) + if err != nil || d <= 0 { + return fallback + } + return d +} + // discoverLlamaServerLocal probes for a running llama-server process and // returns its resident models. Returns nil if llama-server is not installed or // not running. diff --git a/internal/facts/tools.go b/internal/facts/tools.go index 2a53426..c60803d 100644 --- a/internal/facts/tools.go +++ b/internal/facts/tools.go @@ -40,13 +40,55 @@ const OllamaDiscoveryScript = `set -o pipefail; LISTENING=true fi GPU=$($OLLAMA_BIN ps 2>/dev/null | grep -o 'gpu:[^ ]*' | head -1) - RESIDENT=$($OLLAMA_BIN ps 2>/dev/null | awk 'NR>1 && NF { proc=""; size_mb=0; for(i=1;i<=NF;i++){if($i~/[0-9]+%/){proc=$i" "$(i+1)} if(($i=="GB"||$i=="GiB")&&i>1&&($(i-1)+0)>0){size_mb=int($(i-1)*1024+0.5)} if(($i=="MB"||$i=="MiB")&&i>1&&($(i-1)+0)>0){size_mb=int($(i-1)+0.5)}} gsub(/"/, "\\\"", proc); printf "%s{\"name\":\"%s\",\"runtime\":\"ollama\",\"processor\":\"%s\",\"size_vram_mb\":%d,\"source\":\"ollama-ps\"}", (n++ ? "," : ""), $1, proc, size_mb }') + # 'ollama ps -qq' (added in Ollama 0.3.10) emits JSON: each entry + # includes name, expires_at (RFC3339) and size_vram. Parse it + # with python3 (always present on nodes with ollama) and emit + # one JSON object per model. Falls back to the existing awk + # parser when the JSON is unavailable (older Ollama). + PS_JSON=$($OLLAMA_BIN ps -qq 2>/dev/null || echo "") + if [ -n "$PS_JSON" ]; then + RESIDENT=$(printf '%s' "$PS_JSON" | python3 - 2>/dev/null <<'PYEOF' || echo "" +import json, sys +try: + entries = json.loads(sys.stdin.read() or "") +except Exception: + sys.exit(0) +out = [] +for e in entries: + name = e.get("name", "") + if not name: + continue + out.append(json.dumps({ + "name": name, + "runtime": "ollama", + "processor": e.get("processor", "gpu"), + "size_vram_mb": int((e.get("size_vram") or 0) // (1024*1024)), + "source": "ollama-ps", + "expires_at": e.get("expires_at", ""), + })) +print(",".join(out)) +PYEOF +) + else + RESIDENT="" + fi + if [ -z "$RESIDENT" ]; then + # Fallback: original awk parser (older Ollama, no 'ps -qq'). + # No expires_at field is emitted by this path; the local + # parser will leave ExpiresAt zero and WarmthScore at 0. + RESIDENT=$($OLLAMA_BIN ps 2>/dev/null | awk 'NR>1 && NF { proc=""; size_mb=0; for(i=1;i<=NF;i++){if($i~/[0-9]+%/){proc=$i" "$(i+1)} if(($i=="GB"||$i=="GiB")&&i>1&&($(i-1)+0)>0){size_mb=int($(i-1)*1024+0.5)} if(($i=="MB"||$i=="MiB")&&i>1&&($(i-1)+0)>0){size_mb=int($(i-1)+0.5)}} gsub(/"/, "\\\"", proc); printf "%s{\"name\":\"%s\",\"runtime\":\"ollama\",\"processor\":\"%s\",\"size_vram_mb\":%d,\"source\":\"ollama-ps\"}", (n++ ? "," : ""), $1, proc, size_mb }') + fi if [ -n "$RESIDENT" ]; then RESIDENT="[$RESIDENT]" else RESIDENT="[]" fi - echo "{\"installed\":true,\"path\":\"$OLLAMA_BIN\",\"version\":\"${VERSION:-unknown}\",\"running\":$( [ -n \"$PGREP\" ] && echo true || echo false ),\"listening\":$LISTENING,\"port\":11434,\"models\":$MODELS,\"resident_models\":$RESIDENT,\"gpu_offload\":\"${GPU:-none}\"}" + # Process-level default_keep_alive (added in Ollama 0.3.10). Read + # from /api/ps; tolerate older Ollama (or versions that omit + # the field) by emitting an empty string. Treat null and any + # failure to parse as empty. + KEEPALIVE=$(curl -s --max-time 2 http://127.0.0.1:11434/api/ps 2>/dev/null | python3 -c "import sys,json; d=json.load(sys.stdin); v=d.get('default_keep_alive'); print('' if v is None else v)" 2>/dev/null || echo "") + echo "{\"installed\":true,\"path\":\"$OLLAMA_BIN\",\"version\":\"${VERSION:-unknown}\",\"running\":$( [ -n \"$PGREP\" ] && echo true || echo false ),\"listening\":$LISTENING,\"port\":11434,\"models\":$MODELS,\"resident_models\":$RESIDENT,\"gpu_offload\":\"${GPU:-none}\",\"default_keep_alive\":\"${KEEPALIVE}\"}" ` // LlamaServerDiscoveryScript is the bash script used to detect a running diff --git a/internal/models/types.go b/internal/models/types.go index 2d830c1..d42d41d 100644 --- a/internal/models/types.go +++ b/internal/models/types.go @@ -171,17 +171,41 @@ type OllamaInfo struct { Port int `json:"port,omitempty" yaml:"port,omitempty"` Models []string `json:"models,omitempty" yaml:"models,omitempty"` GPUOffload string `json:"gpu_offload,omitempty" yaml:"gpu_offload,omitempty"` - Error string `json:"error,omitempty" yaml:"error,omitempty"` + // DefaultKeepAlive is the process-level Ollama default keep-alive + // duration string (e.g. "5m", "1h"). Populated from /api/ps on + // Ollama 0.3.10+; empty when unknown or on older Ollama. The warmth + // computation in internal/facts/local.go (applyOllamaWarmth) parses + // this and falls back to 5m when empty. + DefaultKeepAlive string `json:"default_keep_alive,omitempty" yaml:"default_keep_alive,omitempty"` + Error string `json:"error,omitempty" yaml:"error,omitempty"` } // ResidentModel is additive truth-plane metadata describing a model that is // currently resident in a node runtime according to a live probe. +// +// ExpiresAt and WarmthScore are populated from Ollama's /api/ps +// `expires_at` and `default_keep_alive` fields (Ollama 0.3.10+). They are +// optional: when absent (older Ollama, no `keep_alive`, or other runtimes +// such as llama-server / mlx_lm.server), both fields remain zero and +// WarmthScore is treated as cold. The fields are advisory metadata only; +// placement uses them as a bounded tiebreaker, never as a primary signal +// (see internal/placement/ranker.go modelWarmthRank). type ResidentModel struct { Name string `json:"name" yaml:"name"` Runtime string `json:"runtime,omitempty" yaml:"runtime,omitempty"` Processor string `json:"processor,omitempty" yaml:"processor,omitempty"` Source string `json:"source,omitempty" yaml:"source,omitempty"` SizeVRAMMB int64 `json:"size_vram_mb,omitempty" yaml:"size_vram_mb,omitempty"` // 0 = unknown/not reported by the runtime; currently populated only by the Ollama probe + + // ExpiresAt is the wall-clock time at which the model is expected to + // be unloaded by the runtime. Zero when unknown. + ExpiresAt time.Time `json:"expires_at,omitempty" yaml:"expires_at,omitempty"` + + // WarmthScore is a continuous 0.0–1.0 measure of how recently the + // model was loaded, derived from ExpiresAt and the runtime's + // default_keep_alive. 1.0 = freshly loaded, 0.0 = expired or unknown. + // Always non-negative; clamped to [0, 1] at compute time. + WarmthScore float64 `json:"warmth_score,omitempty" yaml:"warmth_score,omitempty"` } // TurboQuantInfo records whether a node appears able to run a TurboQuant-like diff --git a/internal/placement/empirical.go b/internal/placement/empirical.go index 3b67036..19c5913 100644 --- a/internal/placement/empirical.go +++ b/internal/placement/empirical.go @@ -206,10 +206,46 @@ func relevantResidentModels(n models.NodeFacts, reqs models.TaskRequirements) [] return relevant } +// residentModelRank returns a higher score for nodes with a relevant +// resident model already loaded. Currently a count; reserved for +// future qualitative scoring (warmth is layered on as a separate +// modelWarmthRank tiebreaker in ranker.go, not folded into this). func residentModelRank(n models.NodeFacts, reqs models.TaskRequirements) int { return len(relevantResidentModels(n, reqs)) } +// modelWarmthRank returns a bounded rank ∈ {0, 1, 2} for the warmth +// of the relevant resident model on a node: 0 = cold (unknown, expired, +// or zero), 1 = warm (>0.5), 2 = hot (>0.9). When no relevant model is +// loaded, returns 0. Used as a tiebreaker at position 10 in +// RankCandidates — never a primary signal. +func modelWarmthRank(n models.NodeFacts, reqs models.TaskRequirements) int { + relevant := relevantResidentModels(n, reqs) + if len(relevant) == 0 { + return 0 + } + best := 0.0 + for _, m := range relevant { + if m.WarmthScore > best { + best = m.WarmthScore + } + } + return warmthToRank(best) +} + +// warmthToRank maps a continuous [0, 1] score to a 0/1/2 rank. +// Boundaries: 0 = cold, 1 = warm (>0.5), 2 = hot (>0.9). +func warmthToRank(score float64) int { + switch { + case score > 0.9: + return 2 + case score > 0.5: + return 1 + default: + return 0 + } +} + func residentModelReason(n models.NodeFacts, reqs models.TaskRequirements) string { modelsForReq := relevantResidentModels(n, reqs) if len(modelsForReq) == 0 { diff --git a/internal/placement/ranker.go b/internal/placement/ranker.go index 25b24b1..a069416 100644 --- a/internal/placement/ranker.go +++ b/internal/placement/ranker.go @@ -81,6 +81,7 @@ func RankCandidates(candidates []models.NodeFacts, reqs models.TaskRequirements, turboQuantRank int unifiedMemoryRank int pressureRank int + modelWarmthRank int reservationRatio float64 clusterReservationShare float64 } @@ -132,6 +133,7 @@ func RankCandidates(candidates []models.NodeFacts, reqs models.TaskRequirements, turboQuantRank: turboQuantRank(n), unifiedMemoryRank: unifiedMemoryRank(n, reqs), pressureRank: pressureRank(pressureOf(n)), + modelWarmthRank: modelWarmthRank(n, reqs), reservationRatio: reservationRatio(n), clusterReservationShare: share, } @@ -176,6 +178,10 @@ func RankCandidates(candidates []models.NodeFacts, reqs models.TaskRequirements, return keys[i].pressureRank < keys[j].pressureRank } + if keys[i].modelWarmthRank != keys[j].modelWarmthRank { + return keys[i].modelWarmthRank > keys[j].modelWarmthRank + } + if keys[i].reservationRatio != keys[j].reservationRatio { return keys[i].reservationRatio < keys[j].reservationRatio } diff --git a/internal/placement/warmth_test.go b/internal/placement/warmth_test.go new file mode 100644 index 0000000..079ccdd --- /dev/null +++ b/internal/placement/warmth_test.go @@ -0,0 +1,231 @@ +package placement + +import ( + "testing" + "time" + + "github.com/toasterbook88/axis/internal/facts" + "github.com/toasterbook88/axis/internal/models" +) + +// TestRankCandidatesWarmthLosesToAllocatableRAM verifies the v2 critical-fix +// invariant: warmth is a bounded tiebreaker, never a primary signal. A small +// node with a hot model must not outrank a large node with a cold model. +func TestRankCandidatesWarmthLosesToAllocatableRAM(t *testing.T) { + // 4GB total, 2GB free → only just passes a 1GB requirement. + // Hot ollama model loaded (warmth=1.0). + hot := nodeComplete("hot-small", 2000, "none", "ollama") + hot.Ollama = &models.OllamaInfo{Installed: true, Running: true} + hot.ResidentModels = []models.ResidentModel{ + {Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 1.0}, + } + + // 16GB total, 14GB free, no resident model. + cold := nodeComplete("cold-large", 14000, "none", "ollama") + cold.Ollama = &models.OllamaInfo{Installed: true, Running: true} + + reqs := models.TaskRequirements{ + RequiredTools: []string{"ollama"}, + MinFreeRAMMB: 1024, + Workload: models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference}, + } + + ranked := RankCandidates([]models.NodeFacts{hot, cold}, reqs, nil) + if ranked[0].Name != "cold-large" { + t.Fatalf("expected cold-large to win on allocatable RAM regardless of warmth, got %s", ranked[0].Name) + } +} + +// TestRankCandidatesWarmthBreaksTieOnEqualAllocatableRAM verifies the v2 +// bounded-tiebreaker behavior: two equally-RAM-eligible nodes differ only on +// warmth, and the warmer node wins. +func TestRankCandidatesWarmthBreaksTieOnEqualAllocatableRAM(t *testing.T) { + alpha := nodeComplete("alpha", 8000, "none", "ollama") + alpha.Ollama = &models.OllamaInfo{Installed: true, Running: true} + alpha.ResidentModels = []models.ResidentModel{ + {Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 0.0}, + } + + beta := nodeComplete("beta", 8000, "none", "ollama") + beta.Ollama = &models.OllamaInfo{Installed: true, Running: true} + beta.ResidentModels = []models.ResidentModel{ + {Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 1.0}, + } + + reqs := models.TaskRequirements{ + RequiredTools: []string{"ollama"}, + MinFreeRAMMB: 1024, + Workload: models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference}, + } + + ranked := RankCandidates([]models.NodeFacts{alpha, beta}, reqs, nil) + if ranked[0].Name != "beta" { + t.Fatalf("expected warm beta to win on warmth tiebreaker, got %s", ranked[0].Name) + } +} + +// TestRankCandidatesWarmthFilteredBeforeRanking verifies the v2 safety +// invariant: warmth is never consulted on a node that fails FilterCandidates +// due to RAM shortfall. +func TestRankCandidatesWarmthFilteredBeforeRanking(t *testing.T) { + // Hot model, but only 100MB free → fails the 1GB filter. + hot := nodeComplete("hot-tiny", 100, "none", "ollama") + hot.Resources.RAMTotalMB = 4096 // keep total small so reservable is small too + hot.Ollama = &models.OllamaInfo{Installed: true, Running: true} + hot.ResidentModels = []models.ResidentModel{ + {Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 1.0}, + } + + cold := nodeComplete("cold-large", 8000, "none", "ollama") + cold.Ollama = &models.OllamaInfo{Installed: true, Running: true} + + reqs := models.TaskRequirements{ + RequiredTools: []string{"ollama"}, + MinFreeRAMMB: 1024, + Workload: models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference}, + } + + filtered := FilterCandidates(reqs, []models.NodeFacts{hot, cold}, nil) + if len(filtered) != 1 || filtered[0].Name != "cold-large" { + t.Fatalf("expected FilterCandidates to drop hot-tiny (insufficient RAM), got %v", names(filtered)) + } +} + +// TestWarmthToRankBoundaries pins the bucket boundaries: cold (0), warm +// (>0.5), hot (>0.9). Exact thresholds must behave predictably. +func TestWarmthToRankBoundaries(t *testing.T) { + cases := []struct { + score float64 + want int + }{ + {-0.1, 0}, // negative → cold + {0.0, 0}, // zero → cold + {0.5, 0}, // exactly threshold → cold (not >) + {0.51, 1}, // just above → warm + {0.9, 1}, // exactly threshold → warm (not >) + {0.91, 2}, // just above → hot + {1.0, 2}, // max → hot + {2.0, 2}, // above 1 (defensive) → hot + } + for _, c := range cases { + got := warmthToRank(c.score) + if got != c.want { + t.Errorf("warmthToRank(%v) = %d, want %d", c.score, got, c.want) + } + } +} + +// TestModelWarmthRankPicksHighestRelevant verifies that when a node has +// multiple relevant resident models, the highest warmth wins. +func TestModelWarmthRankPicksHighestRelevant(t *testing.T) { + n := nodeComplete("n", 8000, "none", "ollama") + n.Ollama = &models.OllamaInfo{Installed: true, Running: true} + n.ResidentModels = []models.ResidentModel{ + {Name: "llama3:8b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 0.0}, + {Name: "qwen2:7b", Runtime: "ollama", Source: "ollama-ps", WarmthScore: 0.6}, + } + reqs := models.TaskRequirements{ + RequiredTools: []string{"ollama"}, + Workload: models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference}, + } + if got := modelWarmthRank(n, reqs); got != 1 { + t.Fatalf("expected rank 1 (warm) from best of {0.0, 0.6}, got %d", got) + } +} + +// TestModelWarmthRankIgnoresOtherRuntimes verifies that warmth on a +// non-relevant runtime (e.g. llama.cpp) does not affect an ollama task's +// ranking — only ollama resident models count. +func TestModelWarmthRankIgnoresOtherRuntimes(t *testing.T) { + n := nodeComplete("n", 8000, "none", "ollama") + n.Ollama = &models.OllamaInfo{Installed: true, Running: true} + // Resident model is llama.cpp, but task requires ollama. The warmth + // on the llama.cpp entry must be ignored. + n.ResidentModels = []models.ResidentModel{ + {Name: "llama3:8b", Runtime: "llama.cpp", Source: "proc-cmdline", WarmthScore: 1.0}, + } + reqs := models.TaskRequirements{ + RequiredTools: []string{"ollama"}, + Workload: models.WorkloadProfileMatch{Class: models.ClassLocalLLMInference}, + } + if got := modelWarmthRank(n, reqs); got != 0 { + t.Fatalf("expected rank 0 (no relevant model), got %d", got) + } +} + +// TestApplyOllamaWarmthTimeZero verifies the fact-layer helper: when +// ExpiresAt is zero, WarmthScore stays zero. This is the "older Ollama +// or no keep_alive" graceful-degradation path. +func TestApplyOllamaWarmthTimeZero(t *testing.T) { + rms := []models.ResidentModel{ + {Name: "m1", Runtime: "ollama", Source: "ollama-ps"}, + } + info := &models.OllamaInfo{Installed: true} + facts.ApplyOllamaWarmth(info, rms) + if rms[0].WarmthScore != 0 { + t.Fatalf("expected WarmthScore=0 for zero ExpiresAt, got %v", rms[0].WarmthScore) + } +} + +// TestApplyOllamaWarmthInFuturePopulates verifies that a future ExpiresAt +// yields a non-zero WarmthScore. +func TestApplyOllamaWarmthInFuturePopulates(t *testing.T) { + rms := []models.ResidentModel{ + {Name: "m1", Runtime: "ollama", Source: "ollama-ps", ExpiresAt: time.Now().Add(2 * time.Minute)}, + } + info := &models.OllamaInfo{Installed: true, DefaultKeepAlive: "5m"} + facts.ApplyOllamaWarmth(info, rms) + if rms[0].WarmthScore <= 0 { + t.Fatalf("expected positive WarmthScore, got %v", rms[0].WarmthScore) + } + if rms[0].WarmthScore > 1 { + t.Fatalf("expected WarmthScore ≤ 1, got %v", rms[0].WarmthScore) + } +} + +// TestApplyOllamaWarmthPastExpiresAtIsCold verifies that an already-expired +// ExpiresAt is treated as cold (WarmthScore=0), not negative. +func TestApplyOllamaWarmthPastExpiresAtIsCold(t *testing.T) { + rms := []models.ResidentModel{ + {Name: "m1", Runtime: "ollama", Source: "ollama-ps", ExpiresAt: time.Now().Add(-1 * time.Minute)}, + } + info := &models.OllamaInfo{Installed: true, DefaultKeepAlive: "5m"} + facts.ApplyOllamaWarmth(info, rms) + if rms[0].WarmthScore != 0 { + t.Fatalf("expected WarmthScore=0 for past ExpiresAt, got %v", rms[0].WarmthScore) + } +} + +// TestDefaultOllamaKeepAliveFallbacks verifies the helper resolves 5m when +// DefaultKeepAlive is empty, unparseable, or negative. +func TestDefaultOllamaKeepAliveFallbacks(t *testing.T) { + cases := []struct { + name string + info *models.OllamaInfo + }{ + {"nil", nil}, + {"empty", &models.OllamaInfo{DefaultKeepAlive: ""}}, + {"garbage", &models.OllamaInfo{DefaultKeepAlive: "not-a-duration"}}, + {"negative", &models.OllamaInfo{DefaultKeepAlive: "-30s"}}, + {"zero", &models.OllamaInfo{DefaultKeepAlive: "0s"}}, + } + for _, c := range cases { + got := facts.DefaultOllamaKeepAlive(c.info) + if got != 5*time.Minute { + t.Errorf("%s: expected 5m fallback, got %v", c.name, got) + } + } +} + +// TestDefaultOllamaKeepAliveParses verifies the helper accepts valid +// duration strings and returns them unchanged. +func TestDefaultOllamaKeepAliveParses(t *testing.T) { + info := &models.OllamaInfo{DefaultKeepAlive: "1h"} + if got := facts.DefaultOllamaKeepAlive(info); got != time.Hour { + t.Fatalf("expected 1h, got %v", got) + } + info.DefaultKeepAlive = "30s" + if got := facts.DefaultOllamaKeepAlive(info); got != 30*time.Second { + t.Fatalf("expected 30s, got %v", got) + } +} From 9ff97d619a08b1d0748f78770d0662f15dac1337 Mon Sep 17 00:00:00 2001 From: AXIS Contributor Date: Tue, 2 Jun 2026 12:40:01 -0400 Subject: [PATCH 2/2] fix(facts): make Ollama discovery script and keep-alive duration parsing robust Improve robustness of the Ollama resident model discovery script by handling non-list JSON formats and string type conversions for VRAM size safely. Handle bare integer keep-alive duration strings in DefaultOllamaKeepAlive by appending seconds ("s") unit prior to duration parsing. Addresses review comments from gemini-code-assist[bot] on PR 151. Co-Authored-By: Antigravity --- internal/facts/local.go | 12 ++++++---- internal/facts/tools.go | 40 +++++++++++++++++++------------ internal/placement/warmth_test.go | 24 +++++++++++++------ 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/internal/facts/local.go b/internal/facts/local.go index 0c9e195..30ad05d 100644 --- a/internal/facts/local.go +++ b/internal/facts/local.go @@ -1049,13 +1049,15 @@ func DefaultOllamaKeepAlive(info *models.OllamaInfo) time.Duration { if info == nil { return fallback } - if info.DefaultKeepAlive == "" { + val := strings.TrimSpace(info.DefaultKeepAlive) + if val == "" { return fallback } - // `time.ParseDuration` accepts "5m", "1h30m", "-30s", etc. It does - // not accept a bare integer (seconds) — Ollama emits e.g. "5m" so - // this is fine. Negative durations are clamped to fallback. - d, err := time.ParseDuration(info.DefaultKeepAlive) + // If it's a bare integer (seconds), append "s" so ParseDuration can parse it. + if _, err := strconv.Atoi(val); err == nil { + val += "s" + } + d, err := time.ParseDuration(val) if err != nil || d <= 0 { return fallback } diff --git a/internal/facts/tools.go b/internal/facts/tools.go index c60803d..3db723b 100644 --- a/internal/facts/tools.go +++ b/internal/facts/tools.go @@ -50,23 +50,33 @@ const OllamaDiscoveryScript = `set -o pipefail; RESIDENT=$(printf '%s' "$PS_JSON" | python3 - 2>/dev/null <<'PYEOF' || echo "" import json, sys try: - entries = json.loads(sys.stdin.read() or "") + data = json.loads(sys.stdin.read() or "[]") + entries = data.get("models", data) if isinstance(data, dict) else data + if not isinstance(entries, list): + entries = [] + out = [] + for e in entries: + if not isinstance(e, dict): + continue + name = e.get("name", "") + if not name: + continue + vram = e.get("size_vram") + try: + vram_val = int(vram) if vram is not None else 0 + except (ValueError, TypeError): + vram_val = 0 + out.append(json.dumps({ + "name": name, + "runtime": "ollama", + "processor": e.get("processor", "gpu"), + "size_vram_mb": vram_val // (1024*1024), + "source": "ollama-ps", + "expires_at": e.get("expires_at", ""), + })) + print(",".join(out)) except Exception: sys.exit(0) -out = [] -for e in entries: - name = e.get("name", "") - if not name: - continue - out.append(json.dumps({ - "name": name, - "runtime": "ollama", - "processor": e.get("processor", "gpu"), - "size_vram_mb": int((e.get("size_vram") or 0) // (1024*1024)), - "source": "ollama-ps", - "expires_at": e.get("expires_at", ""), - })) -print(",".join(out)) PYEOF ) else diff --git a/internal/placement/warmth_test.go b/internal/placement/warmth_test.go index 079ccdd..785c2d7 100644 --- a/internal/placement/warmth_test.go +++ b/internal/placement/warmth_test.go @@ -218,14 +218,24 @@ func TestDefaultOllamaKeepAliveFallbacks(t *testing.T) { } // TestDefaultOllamaKeepAliveParses verifies the helper accepts valid -// duration strings and returns them unchanged. +// duration strings and bare integers representing seconds, and returns +// them parsed correctly. func TestDefaultOllamaKeepAliveParses(t *testing.T) { - info := &models.OllamaInfo{DefaultKeepAlive: "1h"} - if got := facts.DefaultOllamaKeepAlive(info); got != time.Hour { - t.Fatalf("expected 1h, got %v", got) + cases := []struct { + input string + expected time.Duration + }{ + {"1h", time.Hour}, + {"30s", 30 * time.Second}, + {"300", 5 * time.Minute}, + {"1200", 20 * time.Minute}, + {"30", 30 * time.Second}, + {" 600 ", 10 * time.Minute}, } - info.DefaultKeepAlive = "30s" - if got := facts.DefaultOllamaKeepAlive(info); got != 30*time.Second { - t.Fatalf("expected 30s, got %v", got) + for _, c := range cases { + info := &models.OllamaInfo{DefaultKeepAlive: c.input} + if got := facts.DefaultOllamaKeepAlive(info); got != c.expected { + t.Errorf("input %q: expected %v, got %v", c.input, c.expected, got) + } } }