opendatahub-io · openshift-merge-bot · Nov 28, 2025 · Nov 21, 2025
diff --git a/config/overlays/odh/inferenceservice-config-patch.yaml b/config/overlays/odh/inferenceservice-config-patch.yaml
@@ -77,7 +77,7 @@ data:
     }
   deploy: |-
     {
-      "defaultDeploymentMode": "Serverless"
+      "defaultDeploymentMode": "RawDeployment"
     }
   metricsAggregator: |-
     {

diff --git a/config/overlays/test/dsc.yaml b/config/overlays/test/dsc.yaml
@@ -19,11 +19,13 @@ spec:
     kserve:
       defaultDeploymentMode: Serverless
       managementState: Managed
+      nim:
+        managementState: Removed
       serving:
         ingressGateway:
           certificate:
             type: OpenshiftDefaultIngress
-        managementState: Managed
+        managementState: Removed
         name: knative-serving
     modelmeshserving:
       managementState: Removed

diff --git a/python/kserve/kserve/constants/constants.py b/python/kserve/kserve/constants/constants.py
@@ -37,6 +37,10 @@
 
 KSERVE_LOGLEVEL = os.environ.get("KSERVE_LOGLEVEL", "INFO").upper()
 
+# KServe label constants
+KSERVE_LABEL_NETWORKING_VISIBILITY = "networking.kserve.io/visibility"
+KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED = "exposed"
+
 # INFERENCESERVICE credentials common constants
 INFERENCESERVICE_CONFIG_MAP_NAME = "inferenceservice-config"
 INFERENCESERVICE_SYSTEM_NAMESPACE = "kserve"

diff --git a/test/e2e/batcher/test_batcher.py b/test/e2e/batcher/test_batcher.py
@@ -52,7 +52,11 @@ async def test_batcher(rest_v1_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )

diff --git a/test/e2e/batcher/test_batcher_custom_port.py b/test/e2e/batcher/test_batcher_custom_port.py
@@ -56,7 +56,11 @@ async def test_batcher_custom_port(rest_v1_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )

diff --git a/test/e2e/common/utils.py b/test/e2e/common/utils.py
@@ -43,14 +43,9 @@ def grpc_client(host, cluster_ip):
     if ":" not in cluster_ip:
         cluster_ip = cluster_ip + ":80"
     logger.info("Cluster IP: %s", cluster_ip)
-    logger.info("gRPC target host: %s", host)
     return InferenceGRPCClient(
         cluster_ip,
         verbose=True,
-        channel_args=[
-            ("grpc.ssl_target_name_override", host),
-        ],
-        timeout=120,
     )
 
 
@@ -277,6 +272,7 @@ async def predict_grpc(
 
     if model_name is None:
         model_name = service_name
+
     client = grpc_client(host, cluster_ip)
 
     response = await client.infer(
@@ -312,10 +308,16 @@ def get_isvc_endpoint(isvc, network_layer: str = "istio"):
     scheme = urlparse(isvc["status"]["url"]).scheme
     host = urlparse(isvc["status"]["url"]).netloc
     path = urlparse(isvc["status"]["url"]).path
-    if os.environ.get("CI_USE_ISVC_HOST") == "1":
+    ci_use_isvc_host = os.environ.get("CI_USE_ISVC_HOST")
+    logger.info(f"CI_USE_ISVC_HOST = {ci_use_isvc_host}")
+    logger.info(f"Host from isvc status URL = {host}")
+    logger.info(f"Network layer = {network_layer}")
+    if ci_use_isvc_host == "1":
         cluster_ip = host
+        logger.info(f"Using external route host: {cluster_ip}")
     elif network_layer == "istio" or network_layer == "istio-ingress":
         cluster_ip = get_cluster_ip()
+        logger.info(f"Using internal cluster IP: {cluster_ip}")
     elif network_layer == "envoy-gatewayapi":
         cluster_ip = get_cluster_ip(
             namespace="envoy-gateway-system",

diff --git a/test/e2e/logger/test_logger.py b/test/e2e/logger/test_logger.py
@@ -53,7 +53,13 @@ async def test_kserve_logger(rest_v1_client):
     isvc = V1beta1InferenceService(
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
-        metadata=client.V1ObjectMeta(name=msg_dumper, namespace=KSERVE_TEST_NAMESPACE),
+        metadata=client.V1ObjectMeta(
+            name=msg_dumper,
+            namespace=KSERVE_TEST_NAMESPACE,
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
+        ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )
 
@@ -65,7 +71,7 @@ async def test_kserve_logger(rest_v1_client):
         min_replicas=1,
         logger=V1beta1LoggerSpec(
             mode="all",
-            url=f"http://{msg_dumper}." + KSERVE_TEST_NAMESPACE + ".svc.cluster.local",
+            url=f"http://{msg_dumper}-predictor." + KSERVE_TEST_NAMESPACE + ".svc.cluster.local",
         ),
         sklearn=V1beta1SKLearnSpec(
             storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
@@ -80,7 +86,11 @@ async def test_kserve_logger(rest_v1_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )

diff --git a/test/e2e/predictor/test_autoscaling.py b/test/e2e/predictor/test_autoscaling.py
@@ -50,6 +50,7 @@
 
 @pytest.mark.predictor
 @pytest.mark.asyncio(scope="session")
+@pytest.mark.skip("We do not test anymore with Knative")
 async def test_sklearn_kserve_concurrency(rest_v1_client):
     service_name = "isvc-sklearn-scale-concurrency"
     predictor = V1beta1PredictorSpec(
@@ -68,7 +69,8 @@ async def test_sklearn_kserve_concurrency(rest_v1_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )
@@ -94,6 +96,7 @@ async def test_sklearn_kserve_concurrency(rest_v1_client):
 
 @pytest.mark.predictor
 @pytest.mark.asyncio(scope="session")
+@pytest.mark.skip("We do not test anymore with Knative")
 async def test_sklearn_kserve_rps(rest_v1_client):
     service_name = "isvc-sklearn-scale-rps"
     predictor = V1beta1PredictorSpec(
@@ -113,7 +116,8 @@ async def test_sklearn_kserve_rps(rest_v1_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )
@@ -137,7 +141,7 @@ async def test_sklearn_kserve_rps(rest_v1_client):
     kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
 
 
-@pytest.mark.skip()
+@pytest.mark.skip("Not needed to test with knative")
 @pytest.mark.asyncio(scope="session")
 async def test_sklearn_kserve_cpu(rest_v1_client):
     service_name = "isvc-sklearn-scale-cpu"

diff --git a/test/e2e/predictor/test_canary.py b/test/e2e/predictor/test_canary.py
@@ -33,6 +33,7 @@
 
 @pytest.mark.predictor
 @pytest.mark.path_based_routing
+@pytest.mark.skip(reason="Canary rollouts require Knative Serving and are not supported in RawDeployment mode")
 def test_canary_rollout():
     service_name = "isvc-canary"
     default_endpoint_spec = V1beta1InferenceServiceSpec(
@@ -52,7 +53,8 @@ def test_canary_rollout():
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
         ),
         spec=default_endpoint_spec,
     )
@@ -96,6 +98,7 @@ def test_canary_rollout():
 
 @pytest.mark.predictor
 @pytest.mark.path_based_routing
+@pytest.mark.skip(reason="Canary rollouts require Knative Serving and are not supported in RawDeployment mode")
 def test_canary_rollout_runtime():
     service_name = "isvc-canary-runtime"
     default_endpoint_spec = V1beta1InferenceServiceSpec(
@@ -118,7 +121,8 @@ def test_canary_rollout_runtime():
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
         ),
         spec=default_endpoint_spec,
     )

diff --git a/test/e2e/predictor/test_lightgbm.py b/test/e2e/predictor/test_lightgbm.py
@@ -54,7 +54,11 @@ async def test_lightgbm_kserve(rest_v1_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )
@@ -93,7 +97,11 @@ async def test_lightgbm_runtime_kserve(rest_v1_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )
@@ -148,7 +156,11 @@ async def test_lightgbm_v2_runtime_mlserver(rest_v2_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )
@@ -201,7 +213,11 @@ async def test_lightgbm_v2_kserve(rest_v2_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )

diff --git a/test/e2e/predictor/test_mlflow.py b/test/e2e/predictor/test_mlflow.py
@@ -61,7 +61,11 @@ async def test_mlflow_v2_runtime_kserve(rest_v2_client):
         api_version=constants.KSERVE_V1BETA1,
         kind=constants.KSERVE_KIND_INFERENCESERVICE,
         metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            name=service_name,
+            namespace=KSERVE_TEST_NAMESPACE,
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
         ),
         spec=V1beta1InferenceServiceSpec(predictor=predictor),
     )

diff --git a/test/e2e/predictor/test_multi_container_probing.py b/test/e2e/predictor/test_multi_container_probing.py
@@ -46,13 +46,11 @@
 kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
 
 
-def get_ksvc(k8s_client: client.CustomObjectsApi, service_name: str) -> dict:
-    return k8s_client.get_namespaced_custom_object(
-        group="serving.knative.dev",
-        version="v1",
-        namespace=KSERVE_TEST_NAMESPACE,
-        plural="services",
+def get_deployment(k8s_client: client.AppsV1Api, service_name: str) -> client.V1Deployment:
+    """Get the Kubernetes Deployment for RawDeployment mode."""
+    return k8s_client.read_namespaced_deployment(
         name=service_name + "-predictor",
+        namespace=KSERVE_TEST_NAMESPACE,
     )
 
 
@@ -132,6 +130,9 @@ async def test_multi_container_probing(rest_v1_client):
                 "serving.kserve.io/autoscalerClass": "none",
                 "serving.kserve.io/DeploymentMode": "RawDeployment",
             },
+            labels={
+                constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
+            },
         ),
         spec=V1beta1InferenceServiceSpec(
             predictor=predictor,
@@ -141,39 +142,39 @@ async def test_multi_container_probing(rest_v1_client):
     kserve_client.create(isvc)
     kserve_client.wait_isvc_ready(service_name, KSERVE_TEST_NAMESPACE)
 
-    # Get the Knative Service
-    k8s_client = client.CustomObjectsApi()
+    # Get the Kubernetes Deployment for RawDeployment mode
+    k8s_client = client.AppsV1Api()
     try:
-        for ksvc in TimeoutSampler(
-            wait_timeout=15,
+        for deployment in TimeoutSampler(
+            wait_timeout=60,
             sleep=2,
-            func=lambda: get_ksvc(k8s_client, service_name),
+            func=lambda: get_deployment(k8s_client, service_name),
         ):
-            # Wait for Ready condition to be True
-            if ksvc["status"].get("conditions"):
-                ready_condition = next(
-                    (c for c in ksvc["status"]["conditions"] if c["type"] == "Ready"),
-                    None,
-                )
-                if ready_condition and ready_condition["status"] == "True":
-                    break
-            # Get latest ksvc state after Ready condition is met
-        ready_ksvc = get_ksvc(k8s_client, service_name)
-        containers = ready_ksvc["spec"]["template"]["spec"]["containers"]
+            # Wait for Deployment to be ready
+            if deployment.status.ready_replicas and deployment.status.ready_replicas > 0:
+                break
+
+        # Get latest deployment state after ready condition is met
+        ready_deployment = get_deployment(k8s_client, service_name)
+        containers = ready_deployment.spec.template.spec.containers
+
+        # Find containers by name
         kserve_container = next(
-            c for c in containers if c["name"] == "kserve-container"
+            c for c in containers if c.name == "kserve-container"
         )
-        kserve_agent = next(c for c in containers if c["name"] == "kserve-agent")
+        kserve_agent = next(c for c in containers if c.name == "kserve-agent")
 
         # Verify kserve-container probes
-        assert kserve_container["livenessProbe"] is not None
-        assert kserve_container["readinessProbe"] is not None
+        assert kserve_container.liveness_probe is not None
+        assert kserve_container.readiness_probe is not None
+        logger.info("kserve-container probes verified successfully")
 
         # Verify kserve-agent probes
-        assert kserve_agent["livenessProbe"] is not None
-        assert kserve_agent["readinessProbe"] is not None
+        assert kserve_agent.liveness_probe is not None
+        assert kserve_agent.readiness_probe is not None
+        logger.info("kserve-agent probes verified successfully")
 
         kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
     except TimeoutExpiredError as e:
-        logger.error("Timeout waiting for ksvc to be ready")
+        logger.error("Timeout waiting for deployment to be ready")
         raise e