Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/overlays/odh/inferenceservice-config-patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ data:
}
deploy: |-
{
"defaultDeploymentMode": "Serverless"
"defaultDeploymentMode": "RawDeployment"
}
metricsAggregator: |-
{
Expand Down
4 changes: 3 additions & 1 deletion config/overlays/test/dsc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ spec:
kserve:
defaultDeploymentMode: Serverless
managementState: Managed
nim:
managementState: Removed
serving:
ingressGateway:
certificate:
type: OpenshiftDefaultIngress
managementState: Managed
managementState: Removed
name: knative-serving
modelmeshserving:
managementState: Removed
Expand Down
4 changes: 4 additions & 0 deletions python/kserve/kserve/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@

KSERVE_LOGLEVEL = os.environ.get("KSERVE_LOGLEVEL", "INFO").upper()

# KServe label constants
KSERVE_LABEL_NETWORKING_VISIBILITY = "networking.kserve.io/visibility"
KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED = "exposed"

# INFERENCESERVICE credentials common constants
INFERENCESERVICE_CONFIG_MAP_NAME = "inferenceservice-config"
INFERENCESERVICE_SYSTEM_NAMESPACE = "kserve"
Expand Down
6 changes: 5 additions & 1 deletion test/e2e/batcher/test_batcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ async def test_batcher(rest_v1_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand Down
6 changes: 5 additions & 1 deletion test/e2e/batcher/test_batcher_custom_port.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ async def test_batcher_custom_port(rest_v1_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand Down
14 changes: 8 additions & 6 deletions test/e2e/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,9 @@ def grpc_client(host, cluster_ip):
if ":" not in cluster_ip:
cluster_ip = cluster_ip + ":80"
logger.info("Cluster IP: %s", cluster_ip)
logger.info("gRPC target host: %s", host)
return InferenceGRPCClient(
cluster_ip,
verbose=True,
channel_args=[
("grpc.ssl_target_name_override", host),
],
timeout=120,
)


Expand Down Expand Up @@ -277,6 +272,7 @@ async def predict_grpc(

if model_name is None:
model_name = service_name

client = grpc_client(host, cluster_ip)

response = await client.infer(
Expand Down Expand Up @@ -312,10 +308,16 @@ def get_isvc_endpoint(isvc, network_layer: str = "istio"):
scheme = urlparse(isvc["status"]["url"]).scheme
host = urlparse(isvc["status"]["url"]).netloc
path = urlparse(isvc["status"]["url"]).path
if os.environ.get("CI_USE_ISVC_HOST") == "1":
ci_use_isvc_host = os.environ.get("CI_USE_ISVC_HOST")
logger.info(f"CI_USE_ISVC_HOST = {ci_use_isvc_host}")
logger.info(f"Host from isvc status URL = {host}")
logger.info(f"Network layer = {network_layer}")
if ci_use_isvc_host == "1":
cluster_ip = host
logger.info(f"Using external route host: {cluster_ip}")
elif network_layer == "istio" or network_layer == "istio-ingress":
cluster_ip = get_cluster_ip()
logger.info(f"Using internal cluster IP: {cluster_ip}")
elif network_layer == "envoy-gatewayapi":
cluster_ip = get_cluster_ip(
namespace="envoy-gateway-system",
Expand Down
16 changes: 13 additions & 3 deletions test/e2e/logger/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@ async def test_kserve_logger(rest_v1_client):
isvc = V1beta1InferenceService(
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(name=msg_dumper, namespace=KSERVE_TEST_NAMESPACE),
metadata=client.V1ObjectMeta(
name=msg_dumper,
namespace=KSERVE_TEST_NAMESPACE,
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)

Expand All @@ -65,7 +71,7 @@ async def test_kserve_logger(rest_v1_client):
min_replicas=1,
logger=V1beta1LoggerSpec(
mode="all",
url=f"http://{msg_dumper}." + KSERVE_TEST_NAMESPACE + ".svc.cluster.local",
url=f"http://{msg_dumper}-predictor." + KSERVE_TEST_NAMESPACE + ".svc.cluster.local",
),
sklearn=V1beta1SKLearnSpec(
storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
Expand All @@ -80,7 +86,11 @@ async def test_kserve_logger(rest_v1_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand Down
10 changes: 7 additions & 3 deletions test/e2e/predictor/test_autoscaling.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@

@pytest.mark.predictor
@pytest.mark.asyncio(scope="session")
@pytest.mark.skip("We do not test anymore with Knative")
async def test_sklearn_kserve_concurrency(rest_v1_client):
service_name = "isvc-sklearn-scale-concurrency"
predictor = V1beta1PredictorSpec(
Expand All @@ -68,7 +69,8 @@ async def test_sklearn_kserve_concurrency(rest_v1_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand All @@ -94,6 +96,7 @@ async def test_sklearn_kserve_concurrency(rest_v1_client):

@pytest.mark.predictor
@pytest.mark.asyncio(scope="session")
@pytest.mark.skip("We do not test anymore with Knative")
async def test_sklearn_kserve_rps(rest_v1_client):
service_name = "isvc-sklearn-scale-rps"
predictor = V1beta1PredictorSpec(
Expand All @@ -113,7 +116,8 @@ async def test_sklearn_kserve_rps(rest_v1_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand All @@ -137,7 +141,7 @@ async def test_sklearn_kserve_rps(rest_v1_client):
kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)


@pytest.mark.skip()
@pytest.mark.skip("Not needed to test with knative")
@pytest.mark.asyncio(scope="session")
async def test_sklearn_kserve_cpu(rest_v1_client):
service_name = "isvc-sklearn-scale-cpu"
Expand Down
8 changes: 6 additions & 2 deletions test/e2e/predictor/test_canary.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

@pytest.mark.predictor
@pytest.mark.path_based_routing
@pytest.mark.skip(reason="Canary rollouts require Knative Serving and are not supported in RawDeployment mode")
def test_canary_rollout():
service_name = "isvc-canary"
default_endpoint_spec = V1beta1InferenceServiceSpec(
Expand All @@ -52,7 +53,8 @@ def test_canary_rollout():
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
),
spec=default_endpoint_spec,
)
Expand Down Expand Up @@ -96,6 +98,7 @@ def test_canary_rollout():

@pytest.mark.predictor
@pytest.mark.path_based_routing
@pytest.mark.skip(reason="Canary rollouts require Knative Serving and are not supported in RawDeployment mode")
def test_canary_rollout_runtime():
service_name = "isvc-canary-runtime"
default_endpoint_spec = V1beta1InferenceServiceSpec(
Expand All @@ -118,7 +121,8 @@ def test_canary_rollout_runtime():
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
),
spec=default_endpoint_spec,
)
Expand Down
24 changes: 20 additions & 4 deletions test/e2e/predictor/test_lightgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ async def test_lightgbm_kserve(rest_v1_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand Down Expand Up @@ -93,7 +97,11 @@ async def test_lightgbm_runtime_kserve(rest_v1_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand Down Expand Up @@ -148,7 +156,11 @@ async def test_lightgbm_v2_runtime_mlserver(rest_v2_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand Down Expand Up @@ -201,7 +213,11 @@ async def test_lightgbm_v2_kserve(rest_v2_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand Down
6 changes: 5 additions & 1 deletion test/e2e/predictor/test_mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ async def test_mlflow_v2_runtime_kserve(rest_v2_client):
api_version=constants.KSERVE_V1BETA1,
kind=constants.KSERVE_KIND_INFERENCESERVICE,
metadata=client.V1ObjectMeta(
name=service_name, namespace=KSERVE_TEST_NAMESPACE
name=service_name,
namespace=KSERVE_TEST_NAMESPACE,
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(predictor=predictor),
)
Expand Down
59 changes: 30 additions & 29 deletions test/e2e/predictor/test_multi_container_probing.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,11 @@
kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))


def get_ksvc(k8s_client: client.CustomObjectsApi, service_name: str) -> dict:
return k8s_client.get_namespaced_custom_object(
group="serving.knative.dev",
version="v1",
namespace=KSERVE_TEST_NAMESPACE,
plural="services",
def get_deployment(k8s_client: client.AppsV1Api, service_name: str) -> client.V1Deployment:
"""Get the Kubernetes Deployment for RawDeployment mode."""
return k8s_client.read_namespaced_deployment(
name=service_name + "-predictor",
namespace=KSERVE_TEST_NAMESPACE,
)


Expand Down Expand Up @@ -132,6 +130,9 @@ async def test_multi_container_probing(rest_v1_client):
"serving.kserve.io/autoscalerClass": "none",
"serving.kserve.io/DeploymentMode": "RawDeployment",
},
labels={
constants.KSERVE_LABEL_NETWORKING_VISIBILITY: constants.KSERVE_LABEL_NETWORKING_VISIBILITY_EXPOSED,
},
),
spec=V1beta1InferenceServiceSpec(
predictor=predictor,
Expand All @@ -141,39 +142,39 @@ async def test_multi_container_probing(rest_v1_client):
kserve_client.create(isvc)
kserve_client.wait_isvc_ready(service_name, KSERVE_TEST_NAMESPACE)

# Get the Knative Service
k8s_client = client.CustomObjectsApi()
# Get the Kubernetes Deployment for RawDeployment mode
k8s_client = client.AppsV1Api()
try:
for ksvc in TimeoutSampler(
wait_timeout=15,
for deployment in TimeoutSampler(
wait_timeout=60,
sleep=2,
func=lambda: get_ksvc(k8s_client, service_name),
func=lambda: get_deployment(k8s_client, service_name),
):
# Wait for Ready condition to be True
if ksvc["status"].get("conditions"):
ready_condition = next(
(c for c in ksvc["status"]["conditions"] if c["type"] == "Ready"),
None,
)
if ready_condition and ready_condition["status"] == "True":
break
# Get latest ksvc state after Ready condition is met
ready_ksvc = get_ksvc(k8s_client, service_name)
containers = ready_ksvc["spec"]["template"]["spec"]["containers"]
# Wait for Deployment to be ready
if deployment.status.ready_replicas and deployment.status.ready_replicas > 0:
break

# Get latest deployment state after ready condition is met
ready_deployment = get_deployment(k8s_client, service_name)
containers = ready_deployment.spec.template.spec.containers

# Find containers by name
kserve_container = next(
c for c in containers if c["name"] == "kserve-container"
c for c in containers if c.name == "kserve-container"
)
kserve_agent = next(c for c in containers if c["name"] == "kserve-agent")
kserve_agent = next(c for c in containers if c.name == "kserve-agent")

# Verify kserve-container probes
assert kserve_container["livenessProbe"] is not None
assert kserve_container["readinessProbe"] is not None
assert kserve_container.liveness_probe is not None
assert kserve_container.readiness_probe is not None
logger.info("kserve-container probes verified successfully")

# Verify kserve-agent probes
assert kserve_agent["livenessProbe"] is not None
assert kserve_agent["readinessProbe"] is not None
assert kserve_agent.liveness_probe is not None
assert kserve_agent.readiness_probe is not None
logger.info("kserve-agent probes verified successfully")

kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
except TimeoutExpiredError as e:
logger.error("Timeout waiting for ksvc to be ready")
logger.error("Timeout waiting for deployment to be ready")
raise e
Loading
Loading