Skip to content

Commit 064ab7a

Browse files
Remove Dependency on cluster-scoped resources (#173)
The commit introduces following changes- - Replaced clusterrolebinding with rolebinding resource. - The fsGroup: 1001 in the pod security context allows to set proper volume permissions in storage class - The anyuid SCC allows the container to run as any user ID, removing the need to explicitly set RunAsUser - Cleanup existing crb Approved-by: rhdedgar Approved-by: leseb
1 parent 90365e7 commit 064ab7a

File tree

10 files changed

+171
-101
lines changed

10 files changed

+171
-101
lines changed

config/rbac/role.yaml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,9 @@ rules:
9292
resources:
9393
- clusterrolebindings
9494
verbs:
95-
- create
9695
- delete
9796
- get
9897
- list
99-
- patch
100-
- update
101-
- watch
10298
- apiGroups:
10399
- rbac.authorization.k8s.io
104100
resources:
@@ -107,6 +103,18 @@ rules:
107103
- get
108104
- list
109105
- watch
106+
- apiGroups:
107+
- rbac.authorization.k8s.io
108+
resources:
109+
- rolebindings
110+
verbs:
111+
- create
112+
- delete
113+
- get
114+
- list
115+
- patch
116+
- update
117+
- watch
110118
- apiGroups:
111119
- security.openshift.io
112120
resources:

controllers/kubebuilder_rbac.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@ package controllers
1414
// ServiceAccount permissions - controller creates and manages service accounts for PVC permissions
1515
//+kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;watch;create;update;patch;delete
1616

17-
//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;watch;create;update;patch;delete
17+
//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;delete
1818
//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=get;list;watch
1919

20+
// RoleBinding permissions - controller creates and manages role bindings for PVC permissions
21+
//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=get;list;watch;create;update;patch;delete
22+
2023
//+kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=use
2124
//+kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,resourceNames=anyuid,verbs=use
2225

controllers/manifests/base/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ kind: Kustomization
44
resources:
55
- pvc.yaml
66
- serviceaccount.yaml
7-
- scc-binding.yaml
87
- service.yaml
98
- networkpolicy.yaml
109
- deployment.yaml
10+
- rolebinding.yaml
1111

1212
labels:
1313
- includeSelectors: false

controllers/manifests/base/scc-binding.yaml renamed to controllers/manifests/base/rolebinding.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: rbac.authorization.k8s.io/v1
2-
kind: ClusterRoleBinding
2+
kind: RoleBinding
33
metadata:
4-
name: crb
4+
name: rb
55
subjects:
66
- kind: ServiceAccount
77
name: sa

controllers/manifests/base/serviceaccount.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,3 @@ apiVersion: v1
44
kind: ServiceAccount
55
metadata:
66
name: sa
7-
annotations:
8-
# This annotation is used by OpenShift to assign the anyuid SCC
9-
# which allows the container to run as any user ID
10-
openshift.io/scc: anyuid

controllers/resource_helper.go

Lines changed: 7 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ done`, CABundleTempPath, CABundleSourceDir, fileList)
375375
},
376376
SecurityContext: &corev1.SecurityContext{
377377
AllowPrivilegeEscalation: ptr.To(false),
378-
RunAsNonRoot: ptr.To(false),
378+
RunAsNonRoot: ptr.To(true),
379379
Capabilities: &corev1.Capabilities{
380380
Drop: []corev1.Capability{"ALL"},
381381
},
@@ -390,7 +390,7 @@ func configurePodStorage(ctx context.Context, r *LlamaStackDistributionReconcile
390390
}
391391

392392
// Configure storage volumes and init containers
393-
configureStorage(instance, &podSpec, container.Image)
393+
configureStorage(instance, &podSpec)
394394

395395
// Configure TLS CA bundle (with auto-detection support)
396396
configureTLSCABundle(ctx, r, instance, &podSpec, container.Image)
@@ -405,16 +405,16 @@ func configurePodStorage(ctx context.Context, r *LlamaStackDistributionReconcile
405405
}
406406

407407
// configureStorage handles storage volume configuration.
408-
func configureStorage(instance *llamav1alpha1.LlamaStackDistribution, podSpec *corev1.PodSpec, image string) {
408+
func configureStorage(instance *llamav1alpha1.LlamaStackDistribution, podSpec *corev1.PodSpec) {
409409
if instance.Spec.Server.Storage != nil {
410-
configurePersistentStorage(instance, podSpec, image)
410+
configurePersistentStorage(instance, podSpec)
411411
} else {
412412
configureEmptyDirStorage(podSpec)
413413
}
414414
}
415415

416416
// configurePersistentStorage sets up PVC-based storage with init container for permissions.
417-
func configurePersistentStorage(instance *llamav1alpha1.LlamaStackDistribution, podSpec *corev1.PodSpec, image string) {
417+
func configurePersistentStorage(instance *llamav1alpha1.LlamaStackDistribution, podSpec *corev1.PodSpec) {
418418
// Use PVC for persistent storage
419419
podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{
420420
Name: "lls-storage",
@@ -424,47 +424,6 @@ func configurePersistentStorage(instance *llamav1alpha1.LlamaStackDistribution,
424424
},
425425
},
426426
})
427-
428-
// Add init container to fix permissions on the PVC mount.
429-
mountPath := llamav1alpha1.DefaultMountPath
430-
if instance.Spec.Server.Storage.MountPath != "" {
431-
mountPath = instance.Spec.Server.Storage.MountPath
432-
}
433-
434-
commands := []string{
435-
fmt.Sprintf("mkdir -p %s 2>&1 || echo 'Warning: Could not create directory'", mountPath),
436-
fmt.Sprintf("(chown 1001:0 %s 2>&1 || echo 'Warning: Could not change ownership')", mountPath),
437-
fmt.Sprintf("ls -la %s 2>&1", mountPath),
438-
}
439-
command := strings.Join(commands, " && ")
440-
441-
initContainer := corev1.Container{
442-
Name: "update-pvc-permissions",
443-
Image: image,
444-
ImagePullPolicy: corev1.PullAlways,
445-
Command: []string{
446-
"/bin/sh",
447-
"-c",
448-
// Try to set permissions, but don't fail if we can't
449-
command,
450-
},
451-
VolumeMounts: []corev1.VolumeMount{
452-
{
453-
Name: "lls-storage",
454-
MountPath: mountPath,
455-
},
456-
},
457-
SecurityContext: &corev1.SecurityContext{
458-
RunAsUser: ptr.To(int64(0)), // Run as root to be able to change ownership
459-
RunAsGroup: ptr.To(int64(0)),
460-
AllowPrivilegeEscalation: ptr.To(false),
461-
Capabilities: &corev1.Capabilities{
462-
Drop: []corev1.Capability{"ALL"},
463-
},
464-
},
465-
}
466-
467-
podSpec.InitContainers = append(podSpec.InitContainers, initContainer)
468427
}
469428

470429
// configureEmptyDirStorage sets up temporary storage using emptyDir.
@@ -620,8 +579,9 @@ func configurePodOverrides(instance *llamav1alpha1.LlamaStackDistribution, podSp
620579
}
621580

622581
// Set fsGroup to allow write access to mounted volumes
582+
const defaultFSGroup = 1001
623583
if podSpec.SecurityContext.FSGroup == nil {
624-
podSpec.SecurityContext.FSGroup = ptr.To(int64(0))
584+
podSpec.SecurityContext.FSGroup = ptr.To(int64(defaultFSGroup))
625585
}
626586

627587
// Apply other pod overrides if specified

main.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,12 @@ func main() {
146146
os.Exit(1)
147147
}
148148

149+
// Perform one-time upgrade cleanup operations
150+
if err := cluster.PerformUpgradeCleanup(ctx, setupClient); err != nil {
151+
setupLog.Error(err, "failed to perform upgrade cleanup")
152+
os.Exit(1)
153+
}
154+
149155
if err := setupReconciler(ctx, setupClient, mgr, clusterInfo); err != nil {
150156
setupLog.Error(err, "failed to set up reconciler")
151157
os.Exit(1)

pkg/cluster/cluster.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@ import (
55
"encoding/json"
66
"fmt"
77

8+
"github.com/go-logr/logr"
89
"github.com/llamastack/llama-stack-k8s-operator/pkg/deploy"
10+
rbacv1 "k8s.io/api/rbac/v1"
11+
k8serrors "k8s.io/apimachinery/pkg/api/errors"
912
"sigs.k8s.io/controller-runtime/pkg/client"
13+
"sigs.k8s.io/controller-runtime/pkg/log"
1014
)
1115

1216
type ClusterInfo struct {
@@ -31,3 +35,70 @@ func NewClusterInfo(ctx context.Context, client client.Client, embeddedDistribut
3135
DistributionImages: distributionImages,
3236
}, nil
3337
}
38+
39+
// PerformUpgradeCleanup performs one-time cleanup operations for seamless upgrades.
40+
func PerformUpgradeCleanup(ctx context.Context, client client.Client) error {
41+
logger := log.FromContext(ctx).WithName("upgrade-cleanup")
42+
logger.Info("Starting upgrade cleanup operations")
43+
44+
// Cleanup legacy ClusterRoleBindings from cluster-scoped to namespace-scoped migration
45+
cleanupLegacyClusterRoleBindings(ctx, client, logger)
46+
logger.Info("Upgrade cleanup operations completed successfully")
47+
return nil
48+
}
49+
50+
// cleanupLegacyClusterRoleBindings removes ClusterRoleBindings from previous operator versions.
51+
func cleanupLegacyClusterRoleBindings(ctx context.Context, client client.Client, logger logr.Logger) {
52+
// List all ClusterRoleBindings
53+
clusterRoleBindingList := &rbacv1.ClusterRoleBindingList{}
54+
if err := client.List(ctx, clusterRoleBindingList); err != nil {
55+
logger.V(1).Info("Unable to list ClusterRoleBindings for cleanup, skipping legacy cleanup", "error", err)
56+
return
57+
}
58+
59+
var crbsToDelete []*rbacv1.ClusterRoleBinding
60+
for i := range clusterRoleBindingList.Items {
61+
crb := &clusterRoleBindingList.Items[i]
62+
63+
if shouldDeleteLegacyClusterRoleBinding(crb) {
64+
crbsToDelete = append(crbsToDelete, crb)
65+
}
66+
}
67+
68+
// Delete the identified ClusterRoleBindings
69+
for _, crb := range crbsToDelete {
70+
logger.Info("Cleaning up legacy ClusterRoleBinding from previous operator version",
71+
"clusterRoleBinding", crb.Name)
72+
73+
if err := client.Delete(ctx, crb); err != nil {
74+
if k8serrors.IsNotFound(err) {
75+
// Already deleted, continue
76+
continue
77+
}
78+
// Log the error but don't fail startup - the cleanup is best-effort
79+
logger.Error(err, "Failed to delete legacy ClusterRoleBinding, continuing with startup",
80+
"clusterRoleBinding", crb.Name)
81+
}
82+
}
83+
84+
if len(crbsToDelete) > 0 {
85+
logger.Info("Successfully cleaned up legacy ClusterRoleBindings", "count", len(crbsToDelete))
86+
}
87+
}
88+
89+
// shouldDeleteLegacyClusterRoleBinding determines if a ClusterRoleBinding should be deleted.
90+
func shouldDeleteLegacyClusterRoleBinding(crb *rbacv1.ClusterRoleBinding) bool {
91+
// Only delete ClusterRoleBindings that were created by our operator
92+
if managedBy, exists := crb.Labels["app.kubernetes.io/managed-by"]; !exists || managedBy != "llama-stack-operator" {
93+
return false
94+
}
95+
96+
// Check if any subjects are ServiceAccounts in namespaces (namespace-scoped)
97+
for _, subject := range crb.Subjects {
98+
if subject.Kind == "ServiceAccount" && subject.Namespace != "" {
99+
return true
100+
}
101+
}
102+
103+
return false
104+
}

0 commit comments

Comments
 (0)