Skip to content

Commit 1152614

Browse files
prontclaude
andauthored
chore(ci): collect K8s diagnostics on E2E test failure (#25114)
Add a diagnostic step to the K8s E2E workflow that runs on failure. Captures pod logs, events, configs, and node resource usage to avoid deep manual investigation when tests fail. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2d6fea2 commit 1152614

File tree

1 file changed

+32
-1
lines changed

1 file changed

+32
-1
lines changed

.github/workflows/k8s_e2e.yml

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ jobs:
164164
test-e2e-kubernetes:
165165
name: K8s ${{ matrix.kubernetes_version.version }} / ${{ matrix.container_runtime }} (${{ matrix.kubernetes_version.role }})
166166
runs-on: ubuntu-24.04
167-
timeout-minutes: 45
167+
timeout-minutes: 60
168168
needs:
169169
- build-x86_64-unknown-linux-gnu
170170
- compute-k8s-test-plan
@@ -217,6 +217,37 @@ jobs:
217217
max_attempts: 3
218218
command: make test-e2e-kubernetes
219219

220+
- name: Collect K8s diagnostics on failure
221+
if: ${{ !success() }}
222+
run: |
223+
set +e +o pipefail
224+
# Best-effort diagnostics -- never fail the job
225+
run_diag() { local label="$1"; shift; echo "--- $label ---"; "$@" 2>&1 || true; echo; }
226+
# For commands with pipes that can't be passed as args
227+
run_diag_sh() { echo "--- $1 ---"; bash -c "$2" 2>&1 || true; echo; }
228+
229+
run_diag "Cluster-wide pods" kubectl get pods -A -o wide
230+
run_diag "Cluster-wide events" kubectl get events -A --sort-by=.metadata.creationTimestamp
231+
run_diag "Nodes" kubectl get nodes -o wide
232+
233+
for ns in $(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' 2>/dev/null | tr ' ' '\n' | grep -E '^vector-' || true); do
234+
echo "=========================================="
235+
echo "=== Namespace: $ns ==="
236+
echo "=========================================="
237+
run_diag "Pods" kubectl get pods -n "$ns" -o wide
238+
run_diag "Pod descriptions" kubectl describe pods -n "$ns"
239+
run_diag "Events" kubectl get events -n "$ns" --sort-by=.metadata.creationTimestamp
240+
run_diag "ConfigMaps" kubectl get configmaps -n "$ns" -o yaml
241+
242+
for pod in $(kubectl get pods -n "$ns" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null || true); do
243+
run_diag "Logs: $pod" kubectl logs -n "$ns" "$pod" --all-containers=true --tail=100
244+
run_diag "Previous logs: $pod" kubectl logs -n "$ns" "$pod" --all-containers=true --previous --tail=50
245+
done
246+
done
247+
248+
run_diag_sh "Node resources" "kubectl describe nodes | grep -A20 'Allocated resources'"
249+
run_diag "Minikube logs" minikube logs --length=100
250+
220251
final-result:
221252
name: K8s E2E Suite
222253
runs-on: ubuntu-24.04

0 commit comments

Comments
 (0)