Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
efa737e
feat(api): add v1alpha2 API types for LLMInferenceService
KillianGolds Nov 27, 2025
206f65f
feat(api): add v1alpha1 to v1alpha2 conversion logic
KillianGolds Nov 27, 2025
f39ffee
feat(webhooks): add v1alpha2 validation webhooks
KillianGolds Nov 27, 2025
b23714c
feat(crds): add multi-version CRD support with conversion webhooks
KillianGolds Nov 27, 2025
1937e5b
feat(controller): refactor scheduler for Gateway API v1 InferencePool
KillianGolds Nov 27, 2025
09e923e
feat(controller): update router for v1alpha2 and dual InferencePool
KillianGolds Nov 27, 2025
fb535c6
feat(controller): update workload and lifecycle for v1alpha2
KillianGolds Nov 27, 2025
3859649
feat(controller): add DynamicClient and v1alpha2 resource watches
KillianGolds Nov 27, 2025
f28ce72
feat(controller): add SafeChildName utility for K8s naming constraints
KillianGolds Nov 27, 2025
70a1ad0
feat(controller): update monitoring and samples for v1alpha2
KillianGolds Nov 27, 2025
f5cafe3
feat(controller): update test fixtures for v1alpha2
KillianGolds Nov 27, 2025
b336fad
feat(main): register v1alpha2 API and dual validation webhooks
KillianGolds Nov 27, 2025
cf5cbc7
chore: regenerate client code for v1alpha1 and v1alpha2 APIs
KillianGolds Nov 27, 2025
37176b1
test: add e2e test infrastructure with API version parametrization
KillianGolds Nov 27, 2025
c3f4c69
chore: update go dependencies for GIE v1 and K8s 0.34
KillianGolds Nov 27, 2025
33cb777
fix(v1beta1): update PodSpec handling for K8s 0.34 compatibility
KillianGolds Nov 27, 2025
3765a3d
fix(v1beta1): address linter warnings for controller-runtime v0.22+
KillianGolds Nov 27, 2025
e35bab2
chore: update auto-generated files from make precommit
KillianGolds Nov 28, 2025
1cbc15e
chore(build): update Dockerfile for Go 1.24.7 compatibility
KillianGolds Nov 28, 2025
ad36904
Allow stopping LLMInferenceService (#974)
pierDipi Nov 25, 2025
fa1f211
test: update stop tests for GIE v1 and dual API version support
KillianGolds Nov 28, 2025
6d7abd6
Merge upstream/release-v0.15
KillianGolds Nov 28, 2025
9b8a9e8
test: fix integration tests for v1alpha2 CRD validation
KillianGolds Nov 28, 2025
24d832c
chore: update auto-generated files from make precommit
KillianGolds Nov 28, 2025
4fdc1d4
build: update Dockerfiles to use golang:1.24.7
KillianGolds Nov 28, 2025
2c2193f
fix(python): remove broken auto-generated test stubs
KillianGolds Nov 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager binary
FROM registry.access.redhat.com/ubi9/go-toolset:1.24 AS builder
FROM golang:1.24.7 AS builder

# Copy in the go src
WORKDIR /go/src/github.com/kserve/kserve
Expand Down
2 changes: 1 addition & 1 deletion agent.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the inference-agent binary
FROM registry.access.redhat.com/ubi9/go-toolset:1.24 AS builder
FROM golang:1.24.7 AS builder

# Copy in the go src
WORKDIR /go/src/github.com/kserve/kserve
Expand Down

Large diffs are not rendered by default.

20,688 changes: 20,688 additions & 0 deletions charts/kserve-crd-minimal/templates/serving.kserve.io_llminferenceservices.yaml

Large diffs are not rendered by default.

23 changes: 12 additions & 11 deletions charts/kserve-resources/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,18 @@ rules:
- patch
- update
- watch
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
- customresourcedefinitions/status
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- apps
resources:
Expand Down Expand Up @@ -121,17 +133,6 @@ rules:
- watch
- apiGroups:
- inference.networking.k8s.io
resources:
- inferencepools
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- inference.networking.x-k8s.io
resources:
- inferencemodels
Expand Down
74 changes: 56 additions & 18 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,27 @@ import (
"flag"
"net/http"
"os"
"time"

monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime/schema"
"knative.dev/pkg/apiextensions/storageversion"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/kserve/kserve/pkg/apis"
"github.com/kserve/kserve/pkg/apis/serving/v1alpha1"

kedav1alpha1 "github.com/kedacore/keda/v2/apis/keda/v1alpha1"
otelv1beta1 "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1"
istio_networking "istio.io/api/networking/v1alpha3"
istioclientv1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
apixclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
Expand All @@ -52,12 +60,11 @@ import (

routev1 "github.com/openshift/api/route/v1"

"github.com/kserve/kserve/pkg/controller/llmisvc"
llmisvcvalidation "github.com/kserve/kserve/pkg/controller/llmisvc/validation"

"github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
v1alpha1validation "github.com/kserve/kserve/pkg/apis/serving/v1alpha1/validation"
v1alpha2validation "github.com/kserve/kserve/pkg/apis/serving/v1alpha2/validation"
"github.com/kserve/kserve/pkg/apis/serving/v1beta1"
"github.com/kserve/kserve/pkg/constants"
"github.com/kserve/kserve/pkg/controller/llmisvc"
graphcontroller "github.com/kserve/kserve/pkg/controller/v1alpha1/inferencegraph"
trainedmodelcontroller "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel"
"github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig"
Expand Down Expand Up @@ -181,17 +188,11 @@ func main() {
setupLog.Info("Setting up KServe v1alpha1 scheme")
scheme := mgr.GetScheme()

if err := v1alpha1.AddToScheme(scheme); err != nil {
if err := apis.AddToScheme(scheme); err != nil {
setupLog.Error(err, "unable to add KServe v1alpha1 to scheme")
os.Exit(1)
}

setupLog.Info("Setting up KServe v1beta1 scheme")
if err := v1beta1.AddToScheme(scheme); err != nil {
setupLog.Error(err, "unable to add KServe v1beta1 to scheme")
os.Exit(1)
}

isvcConfigMap, err := v1beta1.GetInferenceServiceConfigMap(context.Background(), clientSet)
if err != nil {
setupLog.Error(err, "unable to get configmap", "name", constants.InferenceServiceConfigMapName, "namespace", constants.KServeNamespace)
Expand Down Expand Up @@ -324,10 +325,16 @@ func main() {
setupLog.Info("Setting up LLMInferenceService controller")
llmEventBroadcaster := record.NewBroadcaster()
llmEventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: clientSet.CoreV1().Events("")})
dynamicClient, err := dynamic.NewForConfig(mgr.GetConfig())
if err != nil {
setupLog.Error(err, "unable to create dynamic client")
os.Exit(1)
}
if err = (&llmisvc.LLMInferenceServiceReconciler{
Client: mgr.GetClient(),
Config: mgr.GetConfig(),
Clientset: clientSet,
DynamicClient: dynamicClient,
EventRecorder: llmEventBroadcaster.NewRecorder(scheme, corev1.EventSource{Component: "LLMInferenceServiceController"}),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "v1beta1Controller", "InferenceService")
Expand Down Expand Up @@ -415,16 +422,19 @@ func main() {
os.Exit(1)
}

llmConfigValidator := &llmisvcvalidation.LLMInferenceServiceConfigValidator{
ClientSet: clientSet,
if err = (&v1alpha1validation.LLMInferenceServiceConfigValidator{ClientSet: clientSet}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "llminferenceserviceconfig")
os.Exit(1)
}
if err = llmConfigValidator.SetupWithManager(mgr); err != nil {
if err = (&v1alpha1validation.LLMInferenceServiceValidator{}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "llminferenceservice")
os.Exit(1)
}
if err = (&v1alpha2validation.LLMInferenceServiceConfigValidator{ClientSet: clientSet}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "llminferenceserviceconfig")
os.Exit(1)
}

llmInferenceServiceValidator := &llmisvcvalidation.LLMInferenceServiceValidator{}
if err = llmInferenceServiceValidator.SetupWithManager(mgr); err != nil {
if err = (&v1alpha2validation.LLMInferenceServiceValidator{}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "llminferenceservice")
os.Exit(1)
}
Expand All @@ -442,9 +452,37 @@ func main() {
os.Exit(1)
}

ctx := signals.SetupSignalHandler()

resources := []string{
"llminferenceserviceconfigs.serving.kserve.io",
"llminferenceservices.serving.kserve.io",
}

go func() {
migrator := storageversion.NewMigrator(dynamicClient, apixclient.NewForConfigOrDie(mgr.GetConfig()))

for {
time.Sleep(10 * time.Second)

allMigrated := true
for _, resource := range resources {
if err := migrator.Migrate(ctx, schema.ParseGroupResource(resource)); err != nil {
setupLog.Error(err, "Failed to migrate", "resource", resource)
allMigrated = false
}
}
if allMigrated {
return
}

setupLog.Info("Failed to migrate all resources, retrying ...", "resources", resources)
}
}()

// Start the Cmd
setupLog.Info("Starting the Cmd.")
if err := mgr.Start(signals.SetupSignalHandler()); err != nil {
if err := mgr.Start(ctx); err != nil {
setupLog.Error(err, "unable to run the manager")
os.Exit(1)
}
Expand Down
4 changes: 4 additions & 0 deletions config/crd/full/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ resources:
- serving.kserve.io_localmodelnodes.yaml
- serving.kserve.io_llminferenceserviceconfigs.yaml
- serving.kserve.io_llminferenceservices.yaml

patches:
- path: llmisvc_conversion_patch.yaml
- path: llmisvcconfig_conversion_patch.yaml
17 changes: 17 additions & 0 deletions config/crd/full/llmisvc_conversion_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Adds conversion webhook configuration to LLMInferenceService CRD
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: llminferenceservices.serving.kserve.io
spec:
conversion:
strategy: Webhook
webhook:
clientConfig:
service:
name: kserve-webhook-server-service
namespace: opendatahub
path: /convert
conversionReviewVersions:
- v1
- v1beta1
17 changes: 17 additions & 0 deletions config/crd/full/llmisvcconfig_conversion_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Adds conversion webhook configuration to LLMInferenceServiceConfig CRD
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: llminferenceserviceconfigs.serving.kserve.io
spec:
conversion:
strategy: Webhook
webhook:
clientConfig:
service:
name: kserve-webhook-server-service
namespace: opendatahub
path: /convert
conversionReviewVersions:
- v1
- v1beta1
Loading
Loading