Skip to content

Commit bc35308

Browse files
authored
[v1] vLLM 0.11.1 (#5482)
1 parent 43eb702 commit bc35308

File tree

5 files changed

+31
-30
lines changed

5 files changed

+31
-30
lines changed

release_images_general.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,24 +58,24 @@ release_images:
5858
public_registry: True
5959
5:
6060
framework: "vllm"
61-
version: "0.11.0"
61+
version: "0.11.1"
6262
arch_type: "x86"
6363
customer_type: "ec2"
6464
general:
6565
device_types: [ "gpu" ]
6666
python_versions: [ "py312" ]
6767
os_version: "ubuntu22.04"
68-
cuda_version: "cu128"
68+
cuda_version: "cu129"
6969
example: False
7070
disable_sm_tag: False
7171
force_release: False
7272
public_registry: True
7373
enable_soci: True
7474
6:
7575
framework: "vllm"
76-
version: "0.10.2"
77-
arch_type: "arm64"
78-
customer_type: "ec2"
76+
version: "0.11.1"
77+
arch_type: "x86"
78+
customer_type: "sagemaker"
7979
general:
8080
device_types: [ "gpu" ]
8181
python_versions: [ "py312" ]
@@ -88,14 +88,14 @@ release_images:
8888
enable_soci: True
8989
7:
9090
framework: "vllm"
91-
version: "0.11.0"
92-
arch_type: "x86"
93-
customer_type: "sagemaker"
91+
version: "0.10.2"
92+
arch_type: "arm64"
93+
customer_type: "ec2"
9494
general:
9595
device_types: [ "gpu" ]
9696
python_versions: [ "py312" ]
9797
os_version: "ubuntu22.04"
98-
cuda_version: "cu128"
98+
cuda_version: "cu129"
9999
example: False
100100
disable_sm_tag: False
101101
force_release: False

test/vllm/sagemaker/test_sm_endpoint.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def deploy_endpoint(name, image_uri, role, instance_type):
5656
instance_type=instance_type,
5757
initial_instance_count=1,
5858
endpoint_name=name,
59+
inference_ami_version="al2-ami-sagemaker-inference-gpu-3-1",
5960
wait=True,
6061
)
6162
print("Endpoint deployment completed successfully")

vllm/buildspec-sm.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
22
prod_account_id: &PROD_ACCOUNT_ID 763104351884
33
region: &REGION <set-$REGION-in-environment>
44
framework: &FRAMEWORK vllm
5-
version: &VERSION "0.11.0"
5+
version: &VERSION "0.11.1"
66
short_version: &SHORT_VERSION "0.11"
77
arch_type: &ARCH_TYPE x86_64
88
autopatch_build: "False"
@@ -35,7 +35,7 @@ images:
3535
<<: *BUILD_CONTEXT
3636
image_size_baseline: 26000
3737
device_type: &DEVICE_TYPE gpu
38-
cuda_version: &CUDA_VERSION cu128
38+
cuda_version: &CUDA_VERSION cu129
3939
python_version: &DOCKER_PYTHON_VERSION py3
4040
tag_python_version: &TAG_PYTHON_VERSION py312
4141
os_version: &OS_VERSION ubuntu22.04
@@ -50,4 +50,4 @@ images:
5050
- sanity
5151
- security
5252
- sagemaker
53-
- eks
53+
# - eks

vllm/buildspec.yml

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
22
prod_account_id: &PROD_ACCOUNT_ID 763104351884
33
region: &REGION <set-$REGION-in-environment>
44
framework: &FRAMEWORK vllm
5-
version: &VERSION "0.11.0"
5+
version: &VERSION "0.11.1"
66
short_version: &SHORT_VERSION "0.11"
77
arch_type: &ARCH_TYPE x86_64
88
autopatch_build: "False"
@@ -35,7 +35,7 @@ images:
3535
<<: *BUILD_CONTEXT
3636
image_size_baseline: 26000
3737
device_type: &DEVICE_TYPE gpu
38-
cuda_version: &CUDA_VERSION cu128
38+
cuda_version: &CUDA_VERSION cu129
3939
python_version: &DOCKER_PYTHON_VERSION py3
4040
tag_python_version: &TAG_PYTHON_VERSION py312
4141
os_version: &OS_VERSION ubuntu22.04
@@ -49,19 +49,19 @@ images:
4949
test_platforms:
5050
- sanity
5151
- security
52-
- ec2
53-
- eks
54-
tests:
55-
- platform: ec2-multi-node-efa
56-
params:
57-
instance_type: p4d.24xlarge
58-
node_count: 2
59-
run:
60-
- python test/v2/ec2/vllm/test_ec2.py
52+
# - ec2
53+
# - eks
54+
# tests:
55+
# - platform: ec2-multi-node-efa
56+
# params:
57+
# instance_type: p4d.24xlarge
58+
# node_count: 2
59+
# run:
60+
# - python test/v2/ec2/vllm/test_ec2.py
6161

62-
# - platform: eks
63-
params:
64-
cluster: dlc-vllm
65-
namespace: vllm
66-
run:
67-
- python test/v2/eks/vllm/vllm_eks_test.py
62+
# # - platform: eks
63+
# params:
64+
# cluster: dlc-vllm
65+
# namespace: vllm
66+
# run:
67+
# - python test/v2/eks/vllm/vllm_eks_test.py

vllm/x86_64/gpu/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM docker.io/vllm/vllm-openai:v0.11.0 as base
1+
FROM docker.io/vllm/vllm-openai:v0.11.1 as base
22
ARG PYTHON="python3"
33
LABEL maintainer="Amazon AI"
44
ARG EFA_VERSION="1.43.3"

0 commit comments

Comments
 (0)