[v1] vLLM 0.11.1 (#5482)

jinyan-li1 · web-flow · commit bc3530831885 · 2025-11-18T21:42:56.000-08:00
diff --git a/release_images_general.yml b/release_images_general.yml
@@ -58,24 +58,24 @@ release_images:
       public_registry: True
   5:
     framework: "vllm"
-    version: "0.11.0"
+    version: "0.11.1"
     arch_type: "x86"
     customer_type: "ec2"
     general:
       device_types: [ "gpu" ]
       python_versions: [ "py312" ]
       os_version: "ubuntu22.04"
-      cuda_version: "cu128"
+      cuda_version: "cu129"
       example: False
       disable_sm_tag: False
       force_release: False
       public_registry: True
       enable_soci: True
   6:
     framework: "vllm"
-    version: "0.10.2"
-    arch_type: "arm64"
-    customer_type: "ec2"
+    version: "0.11.1"
+    arch_type: "x86"
+    customer_type: "sagemaker"
     general:
       device_types: [ "gpu" ]
       python_versions: [ "py312" ]
@@ -88,14 +88,14 @@ release_images:
       enable_soci: True
   7:
     framework: "vllm"
-    version: "0.11.0"
-    arch_type: "x86"
-    customer_type: "sagemaker"
+    version: "0.10.2"
+    arch_type: "arm64"
+    customer_type: "ec2"
     general:
       device_types: [ "gpu" ]
       python_versions: [ "py312" ]
       os_version: "ubuntu22.04"
-      cuda_version: "cu128"
+      cuda_version: "cu129"
       example: False
       disable_sm_tag: False
       force_release: False
diff --git a/test/vllm/sagemaker/test_sm_endpoint.py b/test/vllm/sagemaker/test_sm_endpoint.py
@@ -56,6 +56,7 @@ def deploy_endpoint(name, image_uri, role, instance_type):
             instance_type=instance_type,
             initial_instance_count=1,
             endpoint_name=name,
+            inference_ami_version="al2-ami-sagemaker-inference-gpu-3-1",
             wait=True,
         )
         print("Endpoint deployment completed successfully")
diff --git a/vllm/buildspec-sm.yml b/vllm/buildspec-sm.yml
@@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
 prod_account_id: &PROD_ACCOUNT_ID 763104351884
 region: &REGION <set-$REGION-in-environment>
 framework: &FRAMEWORK vllm
-version: &VERSION "0.11.0"
+version: &VERSION "0.11.1"
 short_version: &SHORT_VERSION "0.11"
 arch_type: &ARCH_TYPE x86_64
 autopatch_build: "False"
@@ -35,7 +35,7 @@ images:
       <<: *BUILD_CONTEXT
     image_size_baseline: 26000
     device_type: &DEVICE_TYPE gpu
-    cuda_version: &CUDA_VERSION cu128
+    cuda_version: &CUDA_VERSION cu129
     python_version: &DOCKER_PYTHON_VERSION py3
     tag_python_version: &TAG_PYTHON_VERSION py312
     os_version: &OS_VERSION ubuntu22.04
@@ -50,4 +50,4 @@ images:
         - sanity
         - security
         - sagemaker
-        - eks
+        # - eks
diff --git a/vllm/buildspec.yml b/vllm/buildspec.yml
@@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
 prod_account_id: &PROD_ACCOUNT_ID 763104351884
 region: &REGION <set-$REGION-in-environment>
 framework: &FRAMEWORK vllm
-version: &VERSION "0.11.0"
+version: &VERSION "0.11.1"
 short_version: &SHORT_VERSION "0.11"
 arch_type: &ARCH_TYPE x86_64
 autopatch_build: "False"
@@ -35,7 +35,7 @@ images:
       <<: *BUILD_CONTEXT
     image_size_baseline: 26000
     device_type: &DEVICE_TYPE gpu
-    cuda_version: &CUDA_VERSION cu128
+    cuda_version: &CUDA_VERSION cu129
     python_version: &DOCKER_PYTHON_VERSION py3
     tag_python_version: &TAG_PYTHON_VERSION py312
     os_version: &OS_VERSION ubuntu22.04
@@ -49,19 +49,19 @@ images:
       test_platforms:
         - sanity
         - security
-        - ec2
-        - eks
-    tests:
-      - platform: ec2-multi-node-efa
-        params:
-          instance_type: p4d.24xlarge
-          node_count: 2
-        run:
-          - python test/v2/ec2/vllm/test_ec2.py
+        # - ec2
+        # - eks
+    # tests:
+    #   - platform: ec2-multi-node-efa
+    #     params:
+    #       instance_type: p4d.24xlarge
+    #       node_count: 2
+    #     run:
+    #       - python test/v2/ec2/vllm/test_ec2.py
         
-      # - platform: eks
-        params:
-          cluster: dlc-vllm
-          namespace: vllm
-        run:
-          - python test/v2/eks/vllm/vllm_eks_test.py
+    #   # - platform: eks
+    #     params:
+    #       cluster: dlc-vllm
+    #       namespace: vllm
+    #     run:
+    #       - python test/v2/eks/vllm/vllm_eks_test.py
diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM docker.io/vllm/vllm-openai:v0.11.0 as base
+FROM docker.io/vllm/vllm-openai:v0.11.1 as base
 ARG PYTHON="python3"
 LABEL maintainer="Amazon AI"
 ARG EFA_VERSION="1.43.3"

Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,7 @@ def deploy_endpoint(name, image_uri, role, instance_type):`
`56`	`56`	`instance_type=instance_type,`
`57`	`57`	`initial_instance_count=1,`
`58`	`58`	`endpoint_name=name,`
	`59`	`+ inference_ami_version="al2-ami-sagemaker-inference-gpu-3-1",`
`59`	`60`	`wait=True,`
`60`	`61`	`)`
`61`	`62`	`print("Endpoint deployment completed successfully")`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-FROM docker.io/vllm/vllm-openai:v0.11.0 as base`
	`1`	`+FROM docker.io/vllm/vllm-openai:v0.11.1 as base`
`2`	`2`	`ARG PYTHON="python3"`
`3`	`3`	`LABEL maintainer="Amazon AI"`
`4`	`4`	`ARG EFA_VERSION="1.43.3"`