-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDockerfile
216 lines (183 loc) · 6.63 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
ARG CUDA_VERSION=12.4.1
ARG IMAGE_DISTRO=ubuntu22.04
ARG PYTHON_VERSION=3.12
# ---------- Builder Base ----------
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${IMAGE_DISTRO} AS base
# Job scaling
ARG MAX_JOBS=32
ENV MAX_JOBS=${MAX_JOBS}
ARG NVCC_THREADS=2
ENV NVCC_THREADS=${NVCC_THREADS}
# Set arch lists for all targets
# 'a' suffix is not forward compatible but enables all optimizations
ARG TORCH_CUDA_ARCH_LIST="9.0a"
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
ARG VLLM_FA_CMAKE_GPU_ARCHES="90a-real"
ENV VLLM_FA_CMAKE_GPU_ARCHES=${VLLM_FA_CMAKE_GPU_ARCHES}
# Update apt packages and install dependencies
ENV DEBIAN_FRONTEND=noninteractive
RUN apt update
RUN apt upgrade -y
RUN apt install -y --no-install-recommends \
curl \
gcc-12 g++-12 \
git \
libibverbs-dev \
libjpeg-turbo8-dev \
libpng-dev \
zlib1g-dev
# Set compiler paths
ENV CC=/usr/bin/gcc-12
ENV CXX=/usr/bin/g++-12
# Install uv
RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR=/usr/local/bin sh
# Setup build workspace
WORKDIR /workspace
# Prep build venv
ARG PYTHON_VERSION
RUN uv venv -p ${PYTHON_VERSION} --seed --python-preference only-managed
ENV VIRTUAL_ENV=/workspace/.venv
ENV PATH=${VIRTUAL_ENV}/bin:${PATH}
ENV CUDA_HOME=/usr/local/cuda
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
FROM base AS build-base
RUN mkdir /wheels
# Install build deps that aren't in project requirements files
# Make sure to upgrade setuptools to avoid triton build bug
# cmake '4.x' isn't parsed right by some tools yet
RUN uv pip install -U build "cmake<4" ninja pybind11 "setuptools<=76" wheel
# Handle arm64 torch build
FROM build-base AS build-torch
ARG TARGETARCH
RUN if [ ${TARGETARCH} = arm64 ]; then \
# Install NVPL for ARM64 \
apt install -y --no-install-recommends nvpl0 && \
export BLAS=NVPL && \
# ARM64 linker optimization \
export CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 && \
export USE_PRIORITIZED_TEXT_FOR_LD=1; \
else \
uv pip install mkl-static mkl-include; \
fi
ARG TORCH_REF=v2.6.0
ARG TORCH_BUILD_VERSION=2.6.0+cu124
ENV PYTORCH_BUILD_VERSION=${TORCH_BUILD_VERSION:-${TORCH_REF#v}}
ENV PYTORCH_BUILD_NUMBER=0
RUN git clone https://github.com/pytorch/pytorch.git
RUN cd pytorch && \
git checkout ${TORCH_REF} && \
git submodule sync --recursive && \
git submodule update --init --recursive -j 8
# # Bump XNNPACK submodule ref to fix compilation bug \
# cd third_party/XNNPACK && \
# git checkout fcc06d1
RUN cd pytorch && \
uv pip install -r requirements.txt && \
uv build --wheel --no-build-isolation -o /wheels
FROM build-base AS build-audio
COPY --from=build-torch /wheels/*.whl wheels/
RUN uv pip install wheels/*
ARG AUDIO_REF=v2.6.0
ARG AUDIO_BUILD_VERSION=2.6.0+cu124
ENV BUILD_VERSION=${AUDIO_BUILD_VERSION:-${AUDIO_REF#v}}
RUN git clone https://github.com/pytorch/audio.git
RUN cd audio && \
git checkout ${AUDIO_REF} && \
git submodule sync --recursive && \
git submodule update --init --recursive -j 8
RUN cd audio && \
uv build --wheel --no-build-isolation -o /wheels
FROM build-base AS build-vision
COPY --from=build-torch /wheels/*.whl wheels/
RUN uv pip install wheels/*
ARG VISION_REF=v0.21.0
ARG VISION_BUILD_VERSION=0.21.0+cu124
ENV BUILD_VERSION=${VISION_BUILD_VERSION:-${VISION_REF#v}}
RUN git clone https://github.com/pytorch/vision.git
RUN cd vision && \
git checkout ${VISION_REF} && \
git submodule sync --recursive && \
git submodule update --init --recursive -j 8
RUN cd vision && \
uv build --wheel --no-build-isolation -o /wheels
FROM build-base AS build-triton
COPY --from=build-torch /wheels/*.whl wheels/
RUN uv pip install wheels/*
ARG TRITON_REF=release/3.2.x
ARG TRITON_BUILD_SUFFIX=+cu124
ENV TRITON_WHEEL_VERSION_SUFFIX=${TRITON_BUILD_SUFFIX:-}
RUN git clone https://github.com/triton-lang/triton.git
RUN cd triton && \
git checkout ${TRITON_REF} && \
git submodule sync --recursive && \
git submodule update --init --recursive -j 8
RUN cd triton && \
uv build python --wheel --no-build-isolation -o /wheels
FROM build-base AS build-xformers
COPY --from=build-torch /wheels/*.whl wheels/
RUN uv pip install wheels/*
ARG XFORMERS_REF=v0.0.29.post2
ARG XFORMERS_BUILD_VERSION=0.0.29.post2+cu124
ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}}
RUN git clone https://github.com/facebookresearch/xformers.git
RUN cd xformers && \
git checkout ${XFORMERS_REF} && \
git submodule sync --recursive && \
git submodule update --init --recursive -j 8
RUN cd xformers && \
uv build --wheel --no-build-isolation -o /wheels
FROM build-base AS build-flashinfer
COPY --from=build-torch /wheels/*.whl wheels/
RUN uv pip install wheels/*
ARG FLASHINFER_ENABLE_AOT=1
ARG FLASHINFER_REF=v0.2.2.post1
ARG FLASHINFER_BUILD_SUFFIX=cu124
ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_SUFFIX:-}
RUN git clone https://github.com/flashinfer-ai/flashinfer.git
RUN cd flashinfer && \
git checkout ${FLASHINFER_REF} && \
git submodule sync --recursive && \
git submodule update --init --recursive -j 8
RUN cd flashinfer && \
uv build --wheel --no-build-isolation -o /wheels
FROM build-base AS build-vllm
COPY --from=build-torch /wheels/*.whl wheels/
RUN uv pip install wheels/*
ARG VLLM_REF=v0.8.4
ARG VLLM_BUILD_VERSION=0.8.4
ENV BUILD_VERSION=${VLLM_BUILD_VERSION:-${VLLM_REF#v}}
ENV SETUPTOOLS_SCM_PRETEND_VERSION=${BUILD_VERSION:-:}
RUN git clone https://github.com/vllm-project/vllm.git
RUN cd vllm && \
git checkout ${VLLM_REF} && \
python use_existing_torch.py && \
uv pip install -r requirements/build.txt && \
uv build --wheel --no-build-isolation -o /wheels
FROM base AS vllm-openai
COPY --from=build-torch /wheels/*.whl wheels/
COPY --from=build-audio /wheels/*.whl wheels/
COPY --from=build-vision /wheels/*.whl wheels/
COPY --from=build-flashinfer /wheels/*.whl wheels/
COPY --from=build-triton /wheels/*.whl wheels/
COPY --from=build-vllm /wheels/*.whl wheels/
COPY --from=build-xformers /wheels/*.whl wheels/
# Copy vllm examples directory
COPY --from=build-vllm /workspace/vllm/examples /workspace/examples/
# Install and cleanup wheels
RUN uv pip install wheels/*
RUN rm -rf wheels
# Install pynvml
RUN uv pip install pynvml
# Add additional packages for vLLM OpenAI
RUN uv pip install accelerate hf_transfer modelscope bitsandbytes timm boto3 runai-model-streamer runai-model-streamer[s3] tensorizer
# Clean uv cache
RUN uv clean
# Clean apt cache
RUN apt autoremove --purge -y
RUN apt clean
RUN rm -rf /var/lib/apt/lists/*
RUN rm -rf /var/cache/apt/archives
# Enable hf-transfer
ENV HF_HUB_ENABLE_HF_TRANSFER=1
# API server entrypoint
ENTRYPOINT ["vllm", "serve"]