Skip to content

Commit 6292cc7

Browse files
authored
ci: mtu check for cilium e2e (#3624)
* init commit: create script for mtu check * ci: cilium e2e run mtu check * ci: scale pod deployment in release tests * ci: rollout status * test: template call * addressing comments, fix script scale deployment * address nits, test maxSkew, add exit on errors * adding missing template calls * fix cilium-overlay e2e
1 parent 1897a16 commit 6292cc7

11 files changed

+156
-0
lines changed

.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ stages:
266266
fi
267267
name: "testAsyncDelete"
268268
displayName: "Verify Async Delete when CNS is down"
269+
- template: ../../templates/cilium-mtu-check.yaml
269270
- template: ../k8s-e2e/k8s-e2e-job-template.yaml
270271
parameters:
271272
sub: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)

.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e-step-template.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,5 @@ steps:
158158
fi
159159
name: "testAsyncDelete"
160160
displayName: "Verify Async Delete when CNS is down"
161+
162+
- template: ../../templates/cilium-mtu-check.yaml

.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e.steps.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,5 @@ steps:
170170
fi
171171
name: "testAsyncDelete"
172172
displayName: "Verify Async Delete when CNS is down"
173+
174+
- template: ../../templates/cilium-mtu-check.yaml

.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-step-template.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ steps:
196196
name: "testAsyncDelete"
197197
displayName: "Verify Async Delete when CNS is down"
198198
199+
- template: ../../templates/cilium-mtu-check.yaml
200+
199201
- script: |
200202
ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test-output/
201203
echo $ARTIFACT_DIR

.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e.steps.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ steps:
198198
fi
199199
name: "testAsyncDelete"
200200
displayName: "Verify Async Delete when CNS is down"
201+
202+
- template: ../../templates/cilium-mtu-check.yaml
201203

202204
- script: |
203205
ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test-output/

.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,5 @@ steps:
237237
fi
238238
name: "testAsyncDelete"
239239
displayName: "Verify Async Delete when CNS is down"
240+
241+
- template: ../../templates/cilium-mtu-check.yaml

.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e.steps.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,7 @@ steps:
248248
fi
249249
name: "testAsyncDelete"
250250
displayName: "Verify Async Delete when CNS is down"
251+
252+
- template: ../../templates/cilium-mtu-check.yaml
253+
254+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
steps:
2+
- script: |
3+
cd hack/scripts
4+
chmod +x cilium-mtu-validation.sh
5+
./cilium-mtu-validation.sh
6+
name: "CiliumMTUValidation"
7+
displayName: "Run Cilium MTU Validation"

.pipelines/templates/cilium-tests.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,5 @@ steps:
8181
fi
8282
name: "testAsyncDelete"
8383
displayName: "Verify Async Delete when CNS is down"
84+
85+
- template: ./cilium-mtu-check.yaml

hack/manifests/nginx.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: nginx
5+
labels:
6+
app: nginx
7+
namespace: kube-system
8+
spec:
9+
replicas: 4
10+
selector:
11+
matchLabels:
12+
app: nginx
13+
template:
14+
metadata:
15+
labels:
16+
app: nginx
17+
spec:
18+
containers:
19+
- name: nginx
20+
image: mcr.microsoft.com/azurelinux/base/nginx:1
21+
ports:
22+
- containerPort: 80
23+
topologySpreadConstraints:
24+
- maxSkew: 1
25+
topologyKey: kubernetes.io/hostname # KV: Key is hostname, value is each unique nodename
26+
whenUnsatisfiable: ScheduleAnyway
27+
labelSelector:
28+
matchLabels:
29+
app: nginx

hack/scripts/cilium-mtu-validation.sh

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/bin/bash
2+
NAMESPACE="kube-system"
3+
4+
echo "Deploy nginx pods for MTU testing"
5+
kubectl apply -f ../manifests/nginx.yaml
6+
kubectl wait --for=condition=available --timeout=60s -n $NAMESPACE deployment/nginx
7+
8+
# Check node count
9+
node_count=$(kubectl get nodes --no-headers | wc -l)
10+
11+
# in CNI release test scenario scale deployments to 3 * node count to get replicas on each node
12+
if [ "$node_count" -gt 1 ]; then
13+
echo "Scaling nginx deployment to $((3 * node_count)) replicas"
14+
kubectl scale deployment nginx --replicas=$((3 * node_count)) -n $NAMESPACE
15+
fi
16+
# Wait for nginx pods to be ready
17+
kubectl wait --for=condition=available --timeout=60s -n $NAMESPACE deployment/nginx
18+
19+
20+
21+
echo "Checking MTU for pods in namespace: $NAMESPACE using Cilium agent and nginx MTU"
22+
23+
# Get all nodes
24+
nodes=$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}')
25+
26+
for node in $nodes; do
27+
echo "Checking node: $node"
28+
29+
# Get the Cilium agent pod running on this node
30+
cilium_pod=$(kubectl get pods -n $NAMESPACE -o wide --field-selector spec.nodeName=$node -l k8s-app=cilium -o jsonpath='{.items[0].metadata.name}')
31+
32+
if [ -z "$cilium_pod" ]; then
33+
echo "Failed to find Cilium agent pod on node $node"
34+
echo "##[error]Failed to find Cilium agent pod on node $node"
35+
exit 1
36+
fi
37+
38+
# Get the MTU of eth0 in the Cilium agent pod
39+
cilium_mtu=$(kubectl exec -n $NAMESPACE $cilium_pod -- cat /sys/class/net/eth0/mtu 2>/dev/null)
40+
41+
if [ -z "$cilium_mtu" ]; then
42+
echo "Failed to get MTU from Cilium agent pod on node $node"
43+
echo "##[error]Failed to get MTU from Cilium agent pod on node $node"
44+
exit 1
45+
fi
46+
47+
echo "Cilium agent eth0 MTU: $cilium_mtu"
48+
49+
# Get an nginx pod running on this node
50+
nginx_pod=$(kubectl get pods -n $NAMESPACE -o wide --field-selector spec.nodeName=$node -l app=nginx -o jsonpath='{.items[0].metadata.name}')
51+
if [ -z "$nginx_pod" ]; then
52+
echo "Failed to find nginx pod on node $node"
53+
echo "##[error]Failed to find nginx pod on node $node"
54+
exit 1
55+
fi
56+
# Get the MTU of eth0 in the nginx pod
57+
nginx_mtu=$(kubectl exec -n $NAMESPACE $nginx_pod -- cat /sys/class/net/eth0/mtu 2>/dev/null)
58+
if [ -z "$nginx_mtu" ]; then
59+
echo "Failed to get MTU from nginx pod on node $node"
60+
echo "##[error]Failed to get MTU from nginx pod on node $node"
61+
exit 1
62+
fi
63+
echo "Nginx pod eth0 MTU: $nginx_mtu"
64+
65+
# Get the node's eth0 MTU
66+
node_mtu=$(kubectl debug node/$node -it --image=busybox -- sh -c "cat /sys/class/net/eth0/mtu" 2>/dev/null | tail -n 1)
67+
68+
if [ -z "$node_mtu" ]; then
69+
echo "Failed to get MTU from node $node"
70+
echo "##[error]Failed to get MTU from node $node"
71+
exit 1
72+
fi
73+
echo "Node eth0 MTU: $node_mtu"
74+
75+
# Check if the MTUs match
76+
if [ "$cilium_mtu" -eq "$nginx_mtu" ] && [ "$nginx_mtu" -eq "$node_mtu" ]; then
77+
echo "MTU validation passed for node $node"
78+
else
79+
echo "MTU validation failed for node $node"
80+
echo "Cilium agent MTU: $cilium_mtu, Nginx pod MTU: $nginx_mtu, Node MTU: $node_mtu"
81+
echo "##[error]MTU validation failed. MTUs do not match."
82+
exit 1
83+
fi
84+
85+
echo "----------------------------------------"
86+
87+
done
88+
89+
# Clean up
90+
kubectl delete deployment nginx -n $NAMESPACE
91+
echo "Cleaned up nginx deployment"
92+
93+
# Clean up the debug pod
94+
debug_pod=$(kubectl get pods -o name | grep "node-debugger")
95+
if [ -n "$debug_pod" ]; then
96+
kubectl delete $debug_pod
97+
kubectl wait --for=delete $debug_pod --timeout=60s
98+
if [ $? -ne 0 ]; then
99+
echo "Failed to clean up debug pod $debug_pod"
100+
fi
101+
else
102+
echo "No debug pod found"
103+
fi

0 commit comments

Comments
 (0)