diff --git a/build.py b/build.py index d02cf14200..c5dc3b3920 100755 --- a/build.py +++ b/build.py @@ -78,7 +78,7 @@ "ort_openvino_version": "2025.3.0", "standalone_openvino_version": "2025.3.0", "dcgm_version": "4.4.0-1", - "vllm_version": "0.10.1.1", + "vllm_version": "0.10.2", "rhel_py_version": "3.12.3", } @@ -660,6 +660,10 @@ def pytorch_cmake_args(images): cargs.append( cmake_backend_enable("pytorch", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx) ) + if target_platform() == "igpu": + cargs.append( + cmake_backend_enable("pytorch", "TRITON_PYTORCH_NVSHMEM", False) + ) return cargs @@ -1514,6 +1518,22 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach ENV PYTHONPATH=/opt/tritonserver/backends/dali/wheel/dali:$PYTHONPATH """ + if target_platform() not in ["igpu", "windows", "rhel"]: + repo_arch = "sbsa" if target_machine == "aarch64" else "x86_64" + df += f""" +RUN curl -o /tmp/cuda-keyring.deb \\ + https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/{repo_arch}/cuda-keyring_1.1-1_all.deb \\ + && apt install /tmp/cuda-keyring.deb \\ + && rm /tmp/cuda-keyring.deb \\ + && apt update -qq \\ + && apt install --yes --no-install-recommends libnvshmem3-cuda-13 \\ + && rm -rf /var/lib/apt/lists/* \\ + && dpkg -L libnvshmem3-cuda-13 | grep libnvshmem_host.so | sed -e 's/libnvshmem_host.*//g' | sort -u > /etc/ld.so.conf.d/libnvshmem3-cuda-13.conf \\ + && ldconfig +""".format( + repo_arch=repo_arch + ) + df += """ WORKDIR /opt/tritonserver RUN rm -fr /opt/tritonserver/*