Skip to content

Commit 100ac46

Browse files
committed
VM cleanup validation and cleanup
Signed-off-by: pruthvitd <[email protected]>
1 parent 2d314d2 commit 100ac46

File tree

4 files changed

+292
-146
lines changed

4 files changed

+292
-146
lines changed

config/rbac/role.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,14 @@ rules:
235235
- patch
236236
- update
237237
- watch
238+
- apiGroups:
239+
- kubevirt.io
240+
resources:
241+
- virtualmachineinstances
242+
verbs:
243+
- get
244+
- list
245+
- watch
238246
- apiGroups:
239247
- kubevirt.io
240248
resources:
@@ -243,6 +251,7 @@ rules:
243251
- delete
244252
- get
245253
- list
254+
- patch
246255
- watch
247256
- apiGroups:
248257
- multicluster.x-k8s.io

internal/controller/util/vm_util.go

Lines changed: 91 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ package util
33
import (
44
"context"
55
"fmt"
6+
"strings"
67

78
"github.com/go-logr/logr"
8-
k8serrors "k8s.io/apimachinery/pkg/api/errors"
99
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1010
"k8s.io/apimachinery/pkg/runtime/schema"
1111
"k8s.io/apimachinery/pkg/types"
@@ -16,8 +16,8 @@ import (
1616
)
1717

1818
const (
19-
KindVirtualMachine = "VirtualMachine"
20-
KubeVirtAPIVersion = "kubevirt.io/v1"
19+
KindVirtualMachine = "VirtualMachine"
20+
KubeVirtAPIVersionPrefix = "kubevirt.io/" // prefix match on group; version follows (e.g., v1)
2121
)
2222

2323
func ListVMsByLabelSelector(
@@ -64,37 +64,26 @@ func ListVMsByVMNamespace(
6464
log logr.Logger,
6565
vmNamespaceList []string,
6666
vmList []string,
67-
) ([]string, error) {
68-
var foundVMList []string
69-
70-
var notFoundErr error
71-
67+
) []virtv1.VirtualMachine {
7268
foundVM := &virtv1.VirtualMachine{}
7369

70+
foundVMList := make([]virtv1.VirtualMachine, 0, len(vmList))
7471
for _, ns := range vmNamespaceList {
7572
for _, vm := range vmList {
7673
vmLookUp := types.NamespacedName{Namespace: ns, Name: vm}
7774
if err := apiReader.Get(ctx, vmLookUp, foundVM); err != nil {
78-
if !k8serrors.IsNotFound(err) {
79-
return nil, err
80-
}
81-
82-
if notFoundErr == nil {
83-
notFoundErr = err
84-
}
85-
8675
continue
8776
}
8877

89-
foundVMList = append(foundVMList, foundVM.Name)
78+
foundVMList = append(foundVMList, *foundVM)
9079
}
9180
}
9281

9382
if len(foundVMList) > 0 {
94-
return foundVMList, nil
83+
return foundVMList
9584
}
9685

97-
return nil, notFoundErr
86+
return nil
9887
}
9988

10089
// IsVMDeletionInProgress returns true if any listed KubeVirt VM within the given protected NS is in deletion state.
@@ -121,6 +110,8 @@ func IsVMDeletionInProgress(ctx context.Context,
121110

122111
if !foundVM.GetDeletionTimestamp().IsZero() {
123112
// Deletion of vm has been requested
113+
log.Info("VM deletion is in progress", "VM", vm)
114+
124115
return true
125116
}
126117
}
@@ -134,65 +125,108 @@ func IsVMDeletionInProgress(ctx context.Context,
134125
func DeleteVMs(
135126
ctx context.Context,
136127
k8sclient client.Client,
128+
foundVMs []virtv1.VirtualMachine,
137129
vmList []string,
138130
vmNamespaceList []string,
139131
log logr.Logger,
140132
) error {
141-
for _, ns := range vmNamespaceList {
142-
for _, vmName := range vmList {
143-
vm := &virtv1.VirtualMachine{}
144-
key := client.ObjectKey{Name: vmName, Namespace: ns}
133+
for _, vm := range foundVMs {
134+
ns := vm.GetNamespace()
145135

146-
if err := k8sclient.Get(ctx, key, vm); err != nil {
147-
log.Error(err, "Failed to get VM", "namespace", ns, "name", vmName)
136+
vmName := vm.GetName()
148137

149-
return fmt.Errorf("failed to get VM %s/%s: %w", ns, vmName, err)
150-
}
151-
152-
if err := k8sclient.Delete(ctx, vm); err != nil {
153-
log.Error(err, "Failed to delete VM", "namespace", ns, "name", vmName)
138+
// Foreground deletion option
139+
deleteOpts := &client.DeleteOptions{
140+
GracePeriodSeconds: nil,
141+
PropagationPolicy: func() *metav1.DeletionPropagation {
142+
p := metav1.DeletePropagationForeground
154143

155-
return fmt.Errorf("failed to delete VM %s/%s: %w", ns, vmName, err)
156-
}
144+
return &p
145+
}(),
146+
}
147+
if err := k8sclient.Delete(ctx, &vm, deleteOpts); err != nil {
148+
log.Error(err, "Failed to delete VM", "namespace", ns, "name", vmName)
157149

158-
log.Info("Deleted VM successfully", "namespace", ns, "name", vmName)
150+
return fmt.Errorf("failed to delete VM %s/%s: %w", ns, vmName, err)
159151
}
152+
153+
log.Info("Deleted VMs successfully", "from namespace", ns, "VM name", vmName)
160154
}
161155

162156
return nil
163157
}
164158

165-
// IsOwnedByVM recursively traverses ownerReferences until it finds a VirtualMachine.
166-
func IsOwnedByVM(ctx context.Context, c client.Client, obj client.Object,
167-
owners []metav1.OwnerReference, log logr.Logger,
168-
) (string, error) {
169-
for _, owner := range owners {
170-
if owner.Kind == KindVirtualMachine && owner.APIVersion == KubeVirtAPIVersion {
171-
return owner.Name, nil // Found VM root
172-
}
159+
// IsOwnedByVM walks the owner chain and returns the VM metadata object if found.
160+
// It prefers KubeVirt VM owners (kind=VirtualMachine, apigroup starts with kubevirt.io/)
161+
// Assuming all the owners are from same namespace
162+
// Typical KubeVirt ownership depth (PVC→DV→VM or PVC→VMI→VM or virt-launcher-pod->VMI->VM)
163+
func IsOwnedByVM(
164+
ctx context.Context,
165+
c client.Client,
166+
obj client.Object,
167+
log logr.Logger,
168+
) (client.Object, error) {
169+
owners := obj.GetOwnerReferences()
170+
// Breadth-first/flat traversal to reduce cognitive complexity
171+
type queued struct {
172+
ns string
173+
owner metav1.OwnerReference
174+
}
175+
176+
q := make([]queued, 0, len(owners))
177+
for _, o := range owners {
178+
q = append(q, queued{ns: obj.GetNamespace(), owner: o})
179+
}
173180

174-
// Fetch only metadata of the owner
175-
ownerMeta := &metav1.PartialObjectMetadata{}
176-
ownerMeta.SetGroupVersionKind(schema.FromAPIVersionAndKind(owner.APIVersion, owner.Kind))
181+
for len(q) > 0 {
182+
cur := q[0]
183+
q = q[1:]
177184

178-
if err := c.Get(ctx, client.ObjectKey{Namespace: obj.GetNamespace(), Name: owner.Name}, ownerMeta); err != nil {
179-
log.Info("Failed to fetch owner", "error", err)
185+
// Try fetching only the owner's metadata
186+
ownerMeta, err := fetchPartialMeta(ctx, c, cur.ns, cur.owner)
187+
if err != nil {
188+
log.Info("Failed to fetch owner", "gvk", cur.owner.APIVersion+"/"+cur.owner.Kind, "name", cur.owner.Name, "err", err)
180189

181-
continue // skip if not found
190+
continue
182191
}
183192

184-
// Continue traversal with the owner
185-
// Recursively check its ownerReferences
186-
obj = ownerMeta
193+
if ownerMeta.GetUID() != cur.owner.UID {
194+
// UID mismatch; skip
195+
continue
196+
}
187197

188-
nestedOwners := obj.GetOwnerReferences()
189-
if len(nestedOwners) > 0 {
190-
vmName, err := IsOwnedByVM(ctx, c, obj, nestedOwners, log)
191-
if err == nil {
192-
return vmName, nil
193-
}
198+
// If this owner is a KubeVirt VM, return it
199+
if isKubeVirtVM(cur.owner) {
200+
return ownerMeta, nil
201+
}
202+
203+
// Otherwise, enqueue its parents (same namespace assumption for KubeVirt chain)
204+
nestedOwners := ownerMeta.GetOwnerReferences()
205+
for _, nestedOwner := range nestedOwners {
206+
q = append(q, queued{ns: cur.ns, owner: nestedOwner})
194207
}
195208
}
196209

197-
return "", fmt.Errorf("no VM owner found")
210+
return nil, fmt.Errorf("no VM owner found")
211+
}
212+
213+
func isKubeVirtVM(o metav1.OwnerReference) bool {
214+
return o.Kind == KindVirtualMachine && strings.HasPrefix(o.APIVersion, KubeVirtAPIVersionPrefix)
215+
}
216+
217+
// Fetch only metadata of the owner
218+
func fetchPartialMeta(
219+
ctx context.Context,
220+
c client.Client,
221+
ns string,
222+
o metav1.OwnerReference,
223+
) (*metav1.PartialObjectMetadata, error) {
224+
objMeta := &metav1.PartialObjectMetadata{}
225+
objMeta.SetGroupVersionKind(schema.FromAPIVersionAndKind(o.APIVersion, o.Kind))
226+
227+
if err := c.Get(ctx, client.ObjectKey{Namespace: ns, Name: o.Name}, objMeta); err != nil {
228+
return nil, err
229+
}
230+
231+
return objMeta, nil
198232
}

internal/controller/volumereplicationgroup_controller.go

Lines changed: 17 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"maps"
1111
"reflect"
1212
"slices"
13+
"strings"
1314
"sync"
1415
"time"
1516

@@ -28,6 +29,7 @@ import (
2829
"k8s.io/apimachinery/pkg/types"
2930
"k8s.io/apimachinery/pkg/util/sets"
3031
"k8s.io/client-go/util/workqueue"
32+
virtv1 "kubevirt.io/api/core/v1"
3133
ctrl "sigs.k8s.io/controller-runtime"
3234
"sigs.k8s.io/controller-runtime/pkg/builder"
3335
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -398,6 +400,7 @@ func filterPVC(reader client.Reader, pvc *corev1.PersistentVolumeClaim, log logr
398400
// +kubebuilder:rbac:groups="apiextensions.k8s.io",resources=customresourcedefinitions,verbs=get;list;watch
399401
// +kubebuilder:rbac:groups=core,resources=pods/exec,verbs=create
400402
// +kubebuilder:rbac:groups="kubevirt.io",resources=virtualmachines,verbs=get;list;watch;delete
403+
// +kubebuilder:rbac:groups="kubevirt.io",resources=virtualmachineinstances,verbs=get;list;watch
401404
// +kubebuilder:rbac:groups="cdi.kubevirt.io",resources=datavolumes,verbs=get;list;watch
402405

403406
// Reconcile is part of the main kubernetes reconciliation loop which aims to
@@ -1662,34 +1665,7 @@ func (v *VRGInstance) processAsSecondary() ctrl.Result {
16621665
func (v *VRGInstance) reconcileAsSecondary() ctrl.Result {
16631666
result := ctrl.Result{}
16641667

1665-
if v.isVMRecipeProtection() {
1666-
v.log.Info("Checking VM cleanup and cross-cluster resource conflicts",
1667-
"name", v.instance.GetName(),
1668-
"namespace", v.instance.GetNamespace(),
1669-
"recipeName", "vm-recipe")
1670-
1671-
if err := v.validateVMsForStandaloneProtection(); err != nil {
1672-
result.Requeue = true
1673-
}
1674-
1675-
if !result.Requeue {
1676-
v.log.Info("VRG status observed",
1677-
"name", v.instance.GetName(),
1678-
"namespace", v.instance.GetNamespace(),
1679-
"replicationState", v.instance.Spec.ReplicationState,
1680-
"statusState", v.instance.Status.State,
1681-
"generation", v.instance.GetGeneration(),
1682-
"resourceVersion", v.instance.GetResourceVersion(),
1683-
)
1684-
1685-
if v.ShouldCleanupVMForSecondary() {
1686-
v.log.Info("Requeuing until VM cleanup is complete")
1687-
1688-
result.Requeue = true
1689-
}
1690-
}
1691-
}
1692-
1668+
result.Requeue = v.ShouldCleanupForSecondary()
16931669
result.Requeue = v.reconcileVolSyncAsSecondary() || result.Requeue
16941670
result.Requeue = v.reconcileVolRepsAsSecondary() || result.Requeue
16951671

@@ -2467,24 +2443,28 @@ func (v *VRGInstance) CheckForVMConflictOnSecondary() error {
24672443
}
24682444

24692445
func (v *VRGInstance) CheckForVMNameConflictOnSecondary(vmNamespaceList, vmList []string) error {
2470-
var foundVMs []string
2471-
2472-
var err error
2473-
if foundVMs, err = util.ListVMsByVMNamespace(v.ctx, v.reconciler.APIReader,
2474-
v.log, vmNamespaceList, vmList); err != nil {
2475-
if !k8serrors.IsNotFound(err) {
2476-
return fmt.Errorf("failed to lookup virtualmachine resources, check rbacs")
2477-
}
2446+
var foundVMs []virtv1.VirtualMachine
24782447

2448+
if foundVMs = util.ListVMsByVMNamespace(v.ctx, v.reconciler.APIReader,
2449+
v.log, vmNamespaceList, vmList); len(foundVMs) == 0 {
24792450
return nil
24802451
}
24812452

2482-
v.log.Info(fmt.Sprintf("found conflicting VM[%v] on secondary", foundVMs))
2453+
v.log.Info("found conflicting VMs on secondary", "foundVMs", vmNamesString(foundVMs))
24832454

24842455
return fmt.Errorf("protected VMs on the primary cluster share names with VMs on " +
24852456
"the secondary site, which may impact failover or recovery")
24862457
}
24872458

2459+
func vmNamesString(vms []virtv1.VirtualMachine) string {
2460+
names := make([]string, len(vms))
2461+
for i := range vms {
2462+
names[i] = vms[i].Name
2463+
}
2464+
2465+
return strings.Join(names, ", ")
2466+
}
2467+
24882468
func (v *VRGInstance) aggregateVRGNoClusterDataConflictCondition() *metav1.Condition {
24892469
var vmResourceConflict, pvcResourceConflict bool
24902470

0 commit comments

Comments
 (0)