Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions .github/actions/agent-e2e-kind-control-plane/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
name: Setup agent e2e Kind control plane
description: Set up KVM, dependencies, a Kind control plane, and VM bridge networking for agent e2e tests.
inputs:
cluster-name:
description: Kind cluster name.
required: true
vm-subnet:
description: VM subnet prefix.
required: true
runs:
using: composite
steps:
- name: Enable KVM
shell: bash
run: |
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --name-match=kvm

- name: Set up Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod

- name: Install system dependencies
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
qemu-system-x86 qemu-utils genisoimage \
iptables

- name: Create Kind cluster
uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # v1.14.0
with:
cluster_name: ${{ inputs['cluster-name'] }}
version: v0.29.0

- name: Configure Kind cluster networking for VM
shell: bash
env:
KIND_CLUSTER_NAME: ${{ inputs['cluster-name'] }}
run: |
set -euo pipefail
KIND_CONTAINER="${KIND_CLUSTER_NAME}-control-plane"
KIND_IP=$(docker inspect "${KIND_CONTAINER}" \
--format '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
if [[ -z "${KIND_IP}" ]]; then
echo "::error::Could not determine Kind control-plane container IP"
exit 1
fi
echo "[INFO] Kind control-plane IP: ${KIND_IP}"
BRIDGE="virbr-e2e"

sudo iptables -I FORWARD -i "${BRIDGE}" -j ACCEPT
sudo iptables -I FORWARD -o "${BRIDGE}" -j ACCEPT
sudo iptables -t raw -I PREROUTING -i "${BRIDGE}" -j ACCEPT

echo "[INFO] Patching kindnet DaemonSet for VM-reachable control plane endpoint..."
PATCH=$(cat <<EOF
{
"spec": {
"template": {
"spec": {
"containers": [{
"name": "kindnet-cni",
"env": [
{"name": "CONTROL_PLANE_ENDPOINT", "value": "${KIND_IP}:6443"}
]
}]
}
}
}
}
EOF
)
kubectl -n kube-system patch daemonset kindnet \
--type=strategic -p "${PATCH}"

echo "[INFO] Waiting for kindnet rollout..."
kubectl -n kube-system rollout status daemonset/kindnet --timeout=60s

- name: Create QEMU VM
shell: bash
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose create-vm

- name: Attach Kind container to VM bridge
shell: bash
env:
KIND_CLUSTER_NAME: ${{ inputs['cluster-name'] }}
VM_SUBNET: ${{ inputs['vm-subnet'] }}
run: |
set -euo pipefail
KIND_CONTAINER="${KIND_CLUSTER_NAME}-control-plane"
BRIDGE="virbr-e2e"

echo "[INFO] Attaching Kind container to ${BRIDGE} bridge..."
KIND_PID=$(docker inspect "${KIND_CONTAINER}" --format '{{.State.Pid}}')
sudo ip link delete veth-kind-e2e 2>/dev/null || true
sudo ip link add veth-kind-e2e type veth peer name eth-e2e
sudo ip link set veth-kind-e2e master "${BRIDGE}"
sudo ip link set veth-kind-e2e up
sudo ip link set eth-e2e netns "${KIND_PID}"
sudo nsenter -t "${KIND_PID}" -n ip addr add "${VM_SUBNET}.2/24" dev eth-e2e
sudo nsenter -t "${KIND_PID}" -n ip link set eth-e2e up
214 changes: 120 additions & 94 deletions .github/workflows/agent-e2e-kind.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,101 +60,11 @@ jobs:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Enable KVM
run: |
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --name-match=kvm

- name: Set up Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod

- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
qemu-system-x86 qemu-utils genisoimage \
iptables

- name: Create Kind cluster
uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # v1.14.0
- name: Set up test control plane
uses: ./.github/actions/agent-e2e-kind-control-plane
with:
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
version: v0.29.0

- name: Configure Kind cluster networking for VM
run: |
set -euo pipefail
KIND_CONTAINER="${KIND_CLUSTER_NAME}-control-plane"
KIND_IP=$(docker inspect "${KIND_CONTAINER}" \
--format '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
if [[ -z "${KIND_IP}" ]]; then
echo "::error::Could not determine Kind control-plane container IP"
exit 1
fi
echo "[INFO] Kind control-plane IP: ${KIND_IP}"
BRIDGE="virbr-e2e"

# Allow forwarding between the VM bridge and Docker bridge.
sudo iptables -I FORWARD -i "${BRIDGE}" -j ACCEPT
sudo iptables -I FORWARD -o "${BRIDGE}" -j ACCEPT

# Docker may insert raw PREROUTING DROP rules that block non-Docker
# traffic to container IPs. Insert an ACCEPT so the VM can reach the
# Kind API server.
sudo iptables -t raw -I PREROUTING -i "${BRIDGE}" -j ACCEPT

# Patch kindnet so CONTROL_PLANE_ENDPOINT uses the container IP instead
# of the hostname (which is unresolvable from the VM).
echo "[INFO] Patching kindnet DaemonSet for VM-reachable control plane endpoint..."
PATCH=$(cat <<EOF
{
"spec": {
"template": {
"spec": {
"containers": [{
"name": "kindnet-cni",
"env": [
{"name": "CONTROL_PLANE_ENDPOINT", "value": "${KIND_IP}:6443"}
]
}]
}
}
}
}
EOF
)
kubectl -n kube-system patch daemonset kindnet \
--type=strategic -p "${PATCH}"

echo "[INFO] Waiting for kindnet rollout..."
kubectl -n kube-system rollout status daemonset/kindnet --timeout=60s

- name: Create QEMU VM
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose create-vm

- name: Attach Kind container to VM bridge
run: |
set -euo pipefail
KIND_CONTAINER="${KIND_CLUSTER_NAME}-control-plane"
BRIDGE="virbr-e2e"

# Connect the Kind container directly to the VM bridge via a veth
# pair so that the VM subnet is directly reachable at L2. This is
# required because kindnet adds routes of the form
# "10.244.x.0/24 via <nodeIP>" and the kernel rejects these when
# the gateway is only reachable via an indirect route.
echo "[INFO] Attaching Kind container to ${BRIDGE} bridge..."
KIND_PID=$(docker inspect "${KIND_CONTAINER}" --format '{{.State.Pid}}')
sudo ip link delete veth-kind-e2e 2>/dev/null || true
sudo ip link add veth-kind-e2e type veth peer name eth-e2e
sudo ip link set veth-kind-e2e master "${BRIDGE}"
sudo ip link set veth-kind-e2e up
sudo ip link set eth-e2e netns "${KIND_PID}"
sudo nsenter -t "${KIND_PID}" -n ip addr add "${VM_SUBNET}.2/24" dev eth-e2e
sudo nsenter -t "${KIND_PID}" -n ip link set eth-e2e up
cluster-name: ${{ env.KIND_CLUSTER_NAME }}
vm-subnet: ${{ env.VM_SUBNET }}

- name: Install Machine CRD
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose install-machine-crd
Expand Down Expand Up @@ -302,3 +212,119 @@ jobs:
- name: Cleanup
if: always()
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose cleanup

agent-config-e2e:
name: agent config e2e
runs-on: ubuntu-24.04
timeout-minutes: 60
env:
KIND_CLUSTER_NAME: agent-config-e2e
VM_NAME: agent-config-e2e
VM_SUBNET: "192.168.110"
VM_IP: "192.168.110.10"
AGENT_MACHINE_NAME: agent-config-e2e
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Set up test control plane
uses: ./.github/actions/agent-e2e-kind-control-plane
with:
cluster-name: ${{ env.KIND_CLUSTER_NAME }}
vm-subnet: ${{ env.VM_SUBNET }}

- name: Install Machine CRD
Comment thread
bcho marked this conversation as resolved.
Outdated
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose install-machine-crd

- name: Start machina controller
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose start-machina-controller

- name: Validate machina controller
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-machina-controller

- name: Discover and validate node configs
run: |
set -euo pipefail
mapfile -t NODE_CONFIGS < <(find hack/agent/e2e-kind/node-configs \
Comment thread
bcho marked this conversation as resolved.
Outdated
-maxdepth 1 -type f -name '*.json' | sort)
if [[ "${#NODE_CONFIGS[@]}" -eq 0 ]]; then
echo "::error::No node config scenarios found"
exit 1
fi

last_index=$((${#NODE_CONFIGS[@]} - 1))
for index in "${!NODE_CONFIGS[@]}"; do
node_config="${NODE_CONFIGS[$index]}"
scenario="$(basename "${node_config}" .json)"
echo "::group::agent config e2e: ${scenario}"
python3 ./hack/agent/e2e-kind/e2e.py --verbose --node-config "${node_config}" run-agent
python3 ./hack/agent/e2e-kind/e2e.py --verbose wait-for-node
python3 ./hack/agent/e2e-kind/e2e.py --verbose --node-config "${node_config}" validate-node-config
python3 ./hack/agent/e2e-kind/e2e.py --verbose dump-persisted-agent-config
python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-kube-proxy
python3 ./hack/agent/e2e-kind/e2e.py --verbose --node-config "${node_config}" validate-machine-cr-created
python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-workload
python3 ./hack/agent/e2e-kind/e2e.py --verbose --node-config "${node_config}" validate-node-repave-upgrade

if [[ "${index}" -lt "${last_index}" ]]; then
python3 ./hack/agent/e2e-kind/e2e.py --verbose reset-agent
python3 ./hack/agent/e2e-kind/e2e.py --verbose delete-machine-cr
fi
echo "::endgroup::"
done

- name: Collect VM logs
Comment thread
bcho marked this conversation as resolved.
Outdated
if: always()
run: |
mkdir -p logs
VM_DIR=".vm-e2e"
cp "${VM_DIR}/${VM_NAME}.log" logs/vm-serial.log 2>/dev/null || true
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 -i ${VM_DIR}/ssh/id_ed25519 ubuntu@${VM_IP}"
$SSH "sudo journalctl --no-pager -l" > logs/vm-journal.log 2>/dev/null || true
$SSH "sudo journalctl -u unbounded-agent --no-pager -l" > logs/vm-unbounded-agent.log 2>/dev/null || true
$SSH "sudo journalctl -u unbounded-agent-daemon --no-pager -l" > logs/vm-unbounded-agent-daemon.log 2>/dev/null || true
cp ".vm-e2e/machina-controller.log" logs/machina-controller.log 2>/dev/null || true
$SSH "sudo machinectl list --no-pager" > logs/vm-machines.txt 2>/dev/null || true
for MACHINE in kube1 kube2; do
$SSH "sudo journalctl -M ${MACHINE} --no-pager -l" > logs/nspawn-${MACHINE}-journal.log 2>/dev/null || true
$SSH "sudo journalctl -M ${MACHINE} -u kubelet --no-pager -l" > logs/nspawn-${MACHINE}-kubelet.log 2>/dev/null || true
$SSH "sudo journalctl -M ${MACHINE} -u containerd --no-pager -l" > logs/nspawn-${MACHINE}-containerd.log 2>/dev/null || true
$SSH "sudo machinectl status ${MACHINE} --no-pager" > logs/vm-machine-${MACHINE}-status.txt 2>/dev/null || true
done

- name: Collect cluster state
if: always()
run: |
mkdir -p logs
KIND_CONTAINER="${KIND_CLUSTER_NAME}-control-plane"
kubectl get nodes -o wide > logs/nodes.txt 2>&1 || true
kubectl describe nodes > logs/nodes-describe.txt 2>&1 || true
kubectl get pods -A -o wide > logs/pods.txt 2>&1 || true
kubectl get events -A --sort-by='.lastTimestamp' > logs/events.txt 2>&1 || true
kubectl get machines -o wide > logs/machines.txt 2>&1 || true
kubectl get machines -o yaml > logs/machines-full.yaml 2>&1 || true
kubectl get machineconfigurations -o wide > logs/machineconfigurations.txt 2>&1 || true
kubectl get machineconfigurations -o yaml > logs/machineconfigurations-full.yaml 2>&1 || true
kubectl get machineconfigurationversions -o wide > logs/machineconfigurationversions.txt 2>&1 || true
kubectl get machineconfigurationversions -o yaml > logs/machineconfigurationversions-full.yaml 2>&1 || true
kubectl get machineoperations -o wide > logs/machineoperations.txt 2>&1 || true
kubectl get machineoperations -o yaml > logs/machineoperations-full.yaml 2>&1 || true
docker exec "${KIND_CONTAINER}" journalctl -u kubelet --no-pager -l > logs/kind-kubelet.log 2>&1 || true
docker exec "${KIND_CONTAINER}" crictl logs $(docker exec "${KIND_CONTAINER}" crictl ps -a --name kube-apiserver -q 2>/dev/null | head -1) > logs/kube-apiserver.log 2>&1 || true
kubectl get csr -o wide > logs/csrs.txt 2>&1 || true
kubectl describe csr > logs/csrs-describe.txt 2>&1 || true
kubectl describe pods -n e2e-workload-test > logs/workload-pods-describe.txt 2>&1 || true
kubectl logs -n e2e-workload-test --all-containers --prefix e2e-hello > logs/workload-hello.log 2>&1 || true
kubectl logs -n e2e-workload-test --all-containers --prefix e2e-dns-test > logs/workload-dns.log 2>&1 || true

- name: Upload logs
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
if: always()
with:
name: agent-config-e2e-logs
path: logs/
retention-days: 30

- name: Cleanup
if: always()
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose cleanup
Loading
Loading