-
Notifications
You must be signed in to change notification settings - Fork 1
feat: K8s Event + metric coverage for CocoonSet + CocoonHibernation paths #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 11 commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
abf35f3
feat(observability): metrics package + EventRecorder injection
CMGS 2717235
feat(cocoonset): consume lifecycle-state=Failed annotation
CMGS c8f04a8
feat(hibernation): hibernate deadline + duration metrics + retry events
CMGS cad1a69
feat(cocoonset): sub-agent rebuild backoff + dead letter + recovery e…
CMGS 38af223
docs(readme): reflect hibernate timeout + observability surface
CMGS 055f4a9
test(rebuild): cover history round-trip, GC, backoff schedule
CMGS 8fc1a7e
fix(observability): address copilot review findings
CMGS 8c1aaa4
fix(observability): address copilot round-2 findings
CMGS 7ead1c0
fix(hibernation): observe phase duration only on actual transition
CMGS ee746e2
fix(hibernation): dedup phase-exit by UID+phase in-memory
CMGS 8b44303
fix(cocoonset): sub-agents + toolboxes also consume lifecycle=Failed
CMGS 9ff4c20
fix(observability): dedup-by-LTT + same-reconcile rebuild history race
CMGS e10d00c
chore: trim WHAT-comments, extract shared helpers, refresh README
CMGS 0aaf164
refactor(rebuild): use maps.DeleteFunc for stale-slot GC
CMGS b3112ab
fix(rebuild): address copilot round-3 findings
CMGS 3ae4092
chore(rebuild): inline the nil-map hint
CMGS File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| package cocoonset | ||
|
|
||
| import ( | ||
| "encoding/json" | ||
| "time" | ||
|
|
||
| cocoonv1 "github.com/cocoonstack/cocoon-common/apis/v1" | ||
| ) | ||
|
|
||
| const ( | ||
| annotationRebuildHistory = "cocoonset.cocoonstack.io/rebuild-history" | ||
| annotationDeadLetter = "cocoonset.cocoonstack.io/dead-letter" | ||
|
|
||
| maxRebuildAttempts = 4 | ||
| ) | ||
|
|
||
| // rebuildEntry tracks how many times triageSubAgent has rebuilt a slot. | ||
| // Persisted as a JSON map keyed by slot in the CocoonSet annotation so | ||
| // the count survives the pod delete that erases the in-pod annotation. | ||
| type rebuildEntry struct { | ||
| Count int `json:"count"` | ||
| LastDeleted time.Time `json:"lastDeleted"` | ||
| } | ||
|
|
||
| func readRebuildHistory(cs *cocoonv1.CocoonSet) map[int32]rebuildEntry { | ||
| raw := cs.Annotations[annotationRebuildHistory] | ||
| if raw == "" { | ||
| return map[int32]rebuildEntry{} | ||
| } | ||
| m := map[int32]rebuildEntry{} | ||
| if err := json.Unmarshal([]byte(raw), &m); err != nil { | ||
| return map[int32]rebuildEntry{} | ||
| } | ||
|
CMGS marked this conversation as resolved.
|
||
| return m | ||
| } | ||
|
|
||
| // encodeRebuildHistory garbage-collects entries for slots no longer in the | ||
| // spec and returns the JSON payload for the annotation. | ||
| func encodeRebuildHistory(replicas int32, m map[int32]rebuildEntry) (string, error) { | ||
| for slot := range m { | ||
| if slot > replicas { | ||
| delete(m, slot) | ||
| } | ||
| } | ||
| raw, err := json.Marshal(m) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
| return string(raw), nil | ||
| } | ||
|
|
||
| // backoffDelay returns the wait before the next rebuild attempt: 0, 1s, 5s, 30s. | ||
| func backoffDelay(priorCount int) time.Duration { | ||
| switch priorCount { | ||
| case 0: | ||
| return 0 | ||
| case 1: | ||
| return 1 * time.Second | ||
| case 2: | ||
| return 5 * time.Second | ||
| default: | ||
| return 30 * time.Second | ||
|
CMGS marked this conversation as resolved.
|
||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| package cocoonset | ||
|
|
||
| import ( | ||
| "testing" | ||
| "time" | ||
|
|
||
| cocoonv1 "github.com/cocoonstack/cocoon-common/apis/v1" | ||
| ) | ||
|
|
||
| func TestRebuildHistoryRoundTrip(t *testing.T) { | ||
| cs := &cocoonv1.CocoonSet{} | ||
| cs.Spec.Agent.Replicas = 3 | ||
| in := map[int32]rebuildEntry{ | ||
| 1: {Count: 2, LastDeleted: time.Date(2026, 5, 14, 1, 0, 0, 0, time.UTC)}, | ||
| 2: {Count: 1, LastDeleted: time.Date(2026, 5, 14, 1, 0, 30, 0, time.UTC)}, | ||
| } | ||
| enc, err := encodeRebuildHistory(cs.Spec.Agent.Replicas, in) | ||
| if err != nil { | ||
| t.Fatalf("encodeRebuildHistory: %v", err) | ||
| } | ||
| cs.Annotations = map[string]string{annotationRebuildHistory: enc} | ||
| got := readRebuildHistory(cs) | ||
| if got[1].Count != 2 || got[2].Count != 1 { | ||
| t.Fatalf("round-trip lost counts: %+v", got) | ||
| } | ||
| } | ||
|
|
||
| func TestRebuildHistoryGarbageCollectsStaleSlots(t *testing.T) { | ||
| in := map[int32]rebuildEntry{ | ||
| 1: {Count: 1}, | ||
| 2: {Count: 2}, | ||
| 7: {Count: 3}, // slot beyond Replicas, must be pruned | ||
| } | ||
| enc, err := encodeRebuildHistory(2, in) | ||
| if err != nil { | ||
| t.Fatalf("encodeRebuildHistory: %v", err) | ||
| } | ||
| cs := &cocoonv1.CocoonSet{} | ||
| cs.Annotations = map[string]string{annotationRebuildHistory: enc} | ||
| got := readRebuildHistory(cs) | ||
| if _, ok := got[7]; ok { | ||
| t.Fatalf("expected slot 7 pruned, got %+v", got) | ||
| } | ||
| if len(got) != 2 { | ||
| t.Fatalf("expected 2 surviving slots, got %d: %+v", len(got), got) | ||
| } | ||
| } | ||
|
|
||
| func TestBackoffDelaySchedule(t *testing.T) { | ||
| cases := []struct { | ||
| count int | ||
| want time.Duration | ||
| }{ | ||
| {0, 0}, | ||
| {1, 1 * time.Second}, | ||
| {2, 5 * time.Second}, | ||
| {3, 30 * time.Second}, | ||
| {10, 30 * time.Second}, | ||
| } | ||
| for _, tc := range cases { | ||
| if got := backoffDelay(tc.count); got != tc.want { | ||
| t.Errorf("backoffDelay(%d) = %s, want %s", tc.count, got, tc.want) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func TestReadRebuildHistoryHandlesCorruptAnnotation(t *testing.T) { | ||
| cs := &cocoonv1.CocoonSet{} | ||
| cs.Annotations = map[string]string{annotationRebuildHistory: "not-json"} | ||
| if got := readRebuildHistory(cs); len(got) != 0 { | ||
| t.Fatalf("corrupt annotation must yield empty history, got %+v", got) | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.