Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions .github/actions/agent-e2e-kind-control-plane/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
name: Setup agent e2e Kind control plane
description: Set up KVM, dependencies, a Kind control plane, and VM bridge networking for agent e2e tests.
inputs:
cluster-name:
description: Kind cluster name.
required: true
vm-subnet:
description: VM subnet prefix.
required: true
create-vm:
description: Whether to launch the default e2e VM.
required: false
default: "true"
runs:
using: composite
steps:
- name: Enable KVM
shell: bash
run: |
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --name-match=kvm

- name: Set up Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod

- name: Install system dependencies
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
qemu-system-x86 qemu-utils genisoimage \
iptables

- name: Create Kind cluster
uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # v1.14.0
with:
cluster_name: ${{ inputs['cluster-name'] }}
version: v0.29.0

- name: Configure Kind cluster networking for VM
shell: bash
env:
KIND_CLUSTER_NAME: ${{ inputs['cluster-name'] }}
run: |
set -euo pipefail
KIND_CONTAINER="${KIND_CLUSTER_NAME}-control-plane"
KIND_IP=$(docker inspect "${KIND_CONTAINER}" \
--format '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
if [[ -z "${KIND_IP}" ]]; then
echo "::error::Could not determine Kind control-plane container IP"
exit 1
fi
echo "[INFO] Kind control-plane IP: ${KIND_IP}"
BRIDGE="virbr-e2e"

sudo iptables -I FORWARD -i "${BRIDGE}" -j ACCEPT
sudo iptables -I FORWARD -o "${BRIDGE}" -j ACCEPT
sudo iptables -t raw -I PREROUTING -i "${BRIDGE}" -j ACCEPT

echo "[INFO] Patching kindnet DaemonSet for VM-reachable control plane endpoint..."
PATCH=$(cat <<EOF
{
"spec": {
"template": {
"spec": {
"containers": [{
"name": "kindnet-cni",
"env": [
{"name": "CONTROL_PLANE_ENDPOINT", "value": "${KIND_IP}:6443"}
]
}]
}
}
}
}
EOF
)
kubectl -n kube-system patch daemonset kindnet \
--type=strategic -p "${PATCH}"

echo "[INFO] Waiting for kindnet rollout..."
kubectl -n kube-system rollout status daemonset/kindnet --timeout=60s

- name: Create QEMU VM
if: ${{ inputs['create-vm'] == 'true' }}
shell: bash
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose create-vm

- name: Create VM bridge
if: ${{ inputs['create-vm'] != 'true' }}
shell: bash
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose create-vm-bridge

- name: Attach Kind container to VM bridge
shell: bash
env:
KIND_CLUSTER_NAME: ${{ inputs['cluster-name'] }}
VM_SUBNET: ${{ inputs['vm-subnet'] }}
run: |
set -euo pipefail
KIND_CONTAINER="${KIND_CLUSTER_NAME}-control-plane"
BRIDGE="virbr-e2e"

echo "[INFO] Attaching Kind container to ${BRIDGE} bridge..."
KIND_PID=$(docker inspect "${KIND_CONTAINER}" --format '{{.State.Pid}}')
sudo ip link delete veth-kind-e2e 2>/dev/null || true
sudo ip link add veth-kind-e2e type veth peer name eth-e2e
sudo ip link set veth-kind-e2e master "${BRIDGE}"
sudo ip link set veth-kind-e2e up
sudo ip link set eth-e2e netns "${KIND_PID}"
sudo nsenter -t "${KIND_PID}" -n ip addr add "${VM_SUBNET}.2/24" dev eth-e2e
sudo nsenter -t "${KIND_PID}" -n ip link set eth-e2e up
25 changes: 25 additions & 0 deletions .github/actions/agent-e2e-kind-logs/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Collect agent e2e Kind logs
description: Collect and upload diagnostics for agent e2e Kind jobs.
inputs:
artifact-name:
description: Name of the uploaded log artifact.
required: true
node-configs:
description: Whether to collect logs for discovered node config scenarios.
required: false
default: "false"
runs:
using: composite
steps:
- name: Collect logs
shell: bash
env:
COLLECT_NODE_CONFIG_LOGS: ${{ inputs['node-configs'] }}
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose collect-logs

- name: Upload logs
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: ${{ inputs['artifact-name'] }}
path: logs/
retention-days: 30
16 changes: 16 additions & 0 deletions .github/actions/agent-e2e-machina-setup/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Set up agent e2e machina resources
description: Install Machine CRDs and start/validate the machina controller for agent e2e tests.
runs:
using: composite
steps:
- name: Install Machine CRD
shell: bash
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose install-machine-crd

- name: Start machina controller
shell: bash
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose start-machina-controller

- name: Validate machina controller
shell: bash
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-machina-controller
214 changes: 45 additions & 169 deletions .github/workflows/agent-e2e-kind.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,110 +60,14 @@ jobs:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Enable KVM
run: |
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --name-match=kvm

- name: Set up Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
- name: Set up test control plane
uses: ./.github/actions/agent-e2e-kind-control-plane
with:
go-version-file: go.mod
cluster-name: ${{ env.KIND_CLUSTER_NAME }}
vm-subnet: ${{ env.VM_SUBNET }}

- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
qemu-system-x86 qemu-utils genisoimage \
iptables

- name: Create Kind cluster
uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # v1.14.0
with:
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
version: v0.29.0

- name: Configure Kind cluster networking for VM
run: |
set -euo pipefail
KIND_CONTAINER="${KIND_CLUSTER_NAME}-control-plane"
KIND_IP=$(docker inspect "${KIND_CONTAINER}" \
--format '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
if [[ -z "${KIND_IP}" ]]; then
echo "::error::Could not determine Kind control-plane container IP"
exit 1
fi
echo "[INFO] Kind control-plane IP: ${KIND_IP}"
BRIDGE="virbr-e2e"

# Allow forwarding between the VM bridge and Docker bridge.
sudo iptables -I FORWARD -i "${BRIDGE}" -j ACCEPT
sudo iptables -I FORWARD -o "${BRIDGE}" -j ACCEPT

# Docker may insert raw PREROUTING DROP rules that block non-Docker
# traffic to container IPs. Insert an ACCEPT so the VM can reach the
# Kind API server.
sudo iptables -t raw -I PREROUTING -i "${BRIDGE}" -j ACCEPT

# Patch kindnet so CONTROL_PLANE_ENDPOINT uses the container IP instead
# of the hostname (which is unresolvable from the VM).
echo "[INFO] Patching kindnet DaemonSet for VM-reachable control plane endpoint..."
PATCH=$(cat <<EOF
{
"spec": {
"template": {
"spec": {
"containers": [{
"name": "kindnet-cni",
"env": [
{"name": "CONTROL_PLANE_ENDPOINT", "value": "${KIND_IP}:6443"}
]
}]
}
}
}
}
EOF
)
kubectl -n kube-system patch daemonset kindnet \
--type=strategic -p "${PATCH}"

echo "[INFO] Waiting for kindnet rollout..."
kubectl -n kube-system rollout status daemonset/kindnet --timeout=60s

- name: Create QEMU VM
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose create-vm

- name: Attach Kind container to VM bridge
run: |
set -euo pipefail
KIND_CONTAINER="${KIND_CLUSTER_NAME}-control-plane"
BRIDGE="virbr-e2e"

# Connect the Kind container directly to the VM bridge via a veth
# pair so that the VM subnet is directly reachable at L2. This is
# required because kindnet adds routes of the form
# "10.244.x.0/24 via <nodeIP>" and the kernel rejects these when
# the gateway is only reachable via an indirect route.
echo "[INFO] Attaching Kind container to ${BRIDGE} bridge..."
KIND_PID=$(docker inspect "${KIND_CONTAINER}" --format '{{.State.Pid}}')
sudo ip link delete veth-kind-e2e 2>/dev/null || true
sudo ip link add veth-kind-e2e type veth peer name eth-e2e
sudo ip link set veth-kind-e2e master "${BRIDGE}"
sudo ip link set veth-kind-e2e up
sudo ip link set eth-e2e netns "${KIND_PID}"
sudo nsenter -t "${KIND_PID}" -n ip addr add "${VM_SUBNET}.2/24" dev eth-e2e
sudo nsenter -t "${KIND_PID}" -n ip link set eth-e2e up

- name: Install Machine CRD
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose install-machine-crd

- name: Start machina controller
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose start-machina-controller

- name: Validate machina controller
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-machina-controller
- name: Set up machina resources
uses: ./.github/actions/agent-e2e-machina-setup

# No Machine CR pre-created; agent self-registers during bootstrap.
- name: Run agent to join VM to cluster
Expand Down Expand Up @@ -227,77 +131,49 @@ jobs:
- name: Validate node repave upgrade
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-node-repave-upgrade

- name: Collect VM logs
- name: Collect logs
if: always()
run: |
mkdir -p logs
VM_DIR=".vm-e2e"
# Collect VM serial console log
cp "${VM_DIR}/${VM_NAME}.log" logs/vm-serial.log 2>/dev/null || true
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 -i ${VM_DIR}/ssh/id_ed25519 ubuntu@${VM_IP}"
# Collect full journal from the VM host (best-effort)
$SSH "sudo journalctl --no-pager -l" > logs/vm-journal.log 2>/dev/null || true
# Collect unbounded-agent logs from the VM host
$SSH "sudo journalctl -u unbounded-agent --no-pager -l" > logs/vm-unbounded-agent.log 2>/dev/null || true
# Collect unbounded-agent-daemon logs
$SSH "sudo journalctl -u unbounded-agent-daemon --no-pager -l" > logs/vm-unbounded-agent-daemon.log 2>/dev/null || true
cp ".vm-e2e/machina-controller.log" logs/machina-controller.log 2>/dev/null || true
# Kubelet and containerd run inside the nspawn container, not on
# the host. Use 'journalctl -M <machine>' to read the container
# journal, and fall back to machinectl shell if that doesn't work.
# The nspawn machine name is fixed to kube1/kube2 (decoupled from
# the Kubernetes node name).
$SSH "sudo machinectl list --no-pager" > logs/vm-machines.txt 2>/dev/null || true
for MACHINE in kube1 kube2; do
$SSH "sudo journalctl -M ${MACHINE} --no-pager -l" > logs/nspawn-${MACHINE}-journal.log 2>/dev/null || true
$SSH "sudo journalctl -M ${MACHINE} -u kubelet --no-pager -l" > logs/nspawn-${MACHINE}-kubelet.log 2>/dev/null || true
$SSH "sudo journalctl -M ${MACHINE} -u containerd --no-pager -l" > logs/nspawn-${MACHINE}-containerd.log 2>/dev/null || true
$SSH "sudo machinectl status ${MACHINE} --no-pager" > logs/vm-machine-${MACHINE}-status.txt 2>/dev/null || true
$SSH "sudo machinectl shell ${MACHINE} /usr/bin/systemctl list-units --no-pager" > logs/nspawn-${MACHINE}-units.txt 2>/dev/null || true
done

- name: Collect cluster state
uses: ./.github/actions/agent-e2e-kind-logs
with:
artifact-name: agent-e2e-kind-logs

- name: Cleanup
if: always()
run: |
mkdir -p logs
kubectl get nodes -o wide > logs/nodes.txt 2>&1 || true
kubectl describe nodes > logs/nodes-describe.txt 2>&1 || true
kubectl get pods -A -o wide > logs/pods.txt 2>&1 || true
kubectl get events -A --sort-by='.lastTimestamp' > logs/events.txt 2>&1 || true
# Collect Machine CRs (if CRD is installed)
kubectl get machines -o wide > logs/machines.txt 2>&1 || true
kubectl get machines -o yaml > logs/machines-full.yaml 2>&1 || true
kubectl get machineconfigurations -o wide > logs/machineconfigurations.txt 2>&1 || true
kubectl get machineconfigurations -o yaml > logs/machineconfigurations-full.yaml 2>&1 || true
kubectl get machineconfigurationversions -o wide > logs/machineconfigurationversions.txt 2>&1 || true
kubectl get machineconfigurationversions -o yaml > logs/machineconfigurationversions-full.yaml 2>&1 || true
kubectl get machineoperations -o wide > logs/machineoperations.txt 2>&1 || true
kubectl get machineoperations -o yaml > logs/machineoperations-full.yaml 2>&1 || true
# Collect kubelet logs from the Kind control-plane
docker exec kind-control-plane journalctl -u kubelet --no-pager -l > logs/kind-kubelet.log 2>&1 || true
# Collect kube-apiserver logs (critical for diagnosing TLS bootstrap / RBAC issues)
docker exec kind-control-plane crictl logs $(docker exec kind-control-plane crictl ps -a --name kube-apiserver -q 2>/dev/null | head -1) > logs/kube-apiserver.log 2>&1 || true
# Dump all ClusterRoleBindings to see what RBAC kubeadm/Kind created
kubectl get clusterrolebindings -o wide > logs/clusterrolebindings.txt 2>&1 || true
kubectl get clusterrolebindings -o yaml > logs/clusterrolebindings-full.yaml 2>&1 || true
# List CSRs to see if the kubelet attempted TLS bootstrap
kubectl get csr -o wide > logs/csrs.txt 2>&1 || true
kubectl describe csr > logs/csrs-describe.txt 2>&1 || true
# Dump bootstrap token secrets (redact token-secret values)
kubectl get secrets -n kube-system -l 'kubernetes.io/legacy-token-last-used' -o wide > logs/bootstrap-tokens.txt 2>&1 || true
kubectl get secrets -n kube-system --field-selector type=bootstrap.kubernetes.io/token -o yaml > logs/bootstrap-token-secrets.yaml 2>&1 || true
# Collect workload test pod details
kubectl describe pods -n e2e-workload-test > logs/workload-pods-describe.txt 2>&1 || true
kubectl logs -n e2e-workload-test --all-containers --prefix e2e-hello > logs/workload-hello.log 2>&1 || true
kubectl logs -n e2e-workload-test --all-containers --prefix e2e-dns-test > logs/workload-dns.log 2>&1 || true

- name: Upload logs
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose cleanup

agent-config-e2e:
name: agent config e2e
runs-on: ubuntu-24.04
timeout-minutes: 60
env:
KIND_CLUSTER_NAME: agent-config-e2e
VM_NAME: agent-config-e2e
VM_SUBNET: "192.168.110"
VM_IP: "192.168.110.10"
AGENT_MACHINE_NAME: agent-config-e2e
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Set up test control plane
uses: ./.github/actions/agent-e2e-kind-control-plane
with:
cluster-name: ${{ env.KIND_CLUSTER_NAME }}
vm-subnet: ${{ env.VM_SUBNET }}
create-vm: "false"

- name: Set up machina resources
uses: ./.github/actions/agent-e2e-machina-setup

- name: Discover and validate node configs
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-node-configs

- name: Collect logs
if: always()
uses: ./.github/actions/agent-e2e-kind-logs
with:
name: agent-e2e-kind-logs
path: logs/
retention-days: 30
artifact-name: agent-config-e2e-logs
node-configs: "true"

- name: Cleanup
if: always()
Expand Down
Loading
Loading