Skip to content

Commit 7aa6982

Browse files
authored
Merge pull request #248 from ROCm/upstream_merge_24_10_28
Upstream merge 24 10 28
2 parents 5974cc3 + cfd7388 commit 7aa6982

File tree

279 files changed

+9696
-3186
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

279 files changed

+9696
-3186
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1
2+
model_name: "neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.356
8+
- name: "exact_match,flexible-extract"
9+
value: 0.358
10+
limit: 1000
11+
num_fewshot: 5

.buildkite/lm-eval-harness/configs/models-small.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Meta-Llama-3-8B-Instruct.yaml
22
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
3-
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml
3+
Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
44
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
55
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
66
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml

.github/mergify.yml

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
pull_request_rules:
2+
- name: label-documentation
3+
description: Automatically apply documentation label
4+
conditions:
5+
- or:
6+
- files~=^[^/]+\.md$
7+
- files~=^docs/
8+
actions:
9+
label:
10+
add:
11+
- documentation
12+
13+
- name: label-ci-build
14+
description: Automatically apply ci/build label
15+
conditions:
16+
- files~=^\.github/
17+
- files~=\.buildkite/
18+
- files~=^cmake/
19+
- files=CMakeLists.txt
20+
- files~=^Dockerfile
21+
- files~=^requirements.*\.txt
22+
- files=setup.py
23+
actions:
24+
label:
25+
add:
26+
- ci/build
27+
28+
- name: label-frontend
29+
description: Automatically apply frontend label
30+
conditions:
31+
- files~=^vllm/entrypoints/
32+
actions:
33+
label:
34+
add:
35+
- frontend
36+
37+
- name: ping author on conflicts and add 'needs-rebase' label
38+
conditions:
39+
- conflict
40+
- -closed
41+
actions:
42+
label:
43+
add:
44+
- needs-rebase
45+
comment:
46+
message: |
47+
This pull request has merge conflicts that must be resolved before it can be
48+
merged. @{{author}} please rebase it. https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork
49+
50+
- name: remove 'needs-rebase' label when conflict is resolved
51+
conditions:
52+
- -conflict
53+
- -closed
54+
actions:
55+
label:
56+
remove:
57+
- needs-rebase

.github/workflows/stale.yml

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: 'Close inactive issues and PRs'
2+
3+
on:
4+
schedule:
5+
# Daily at 1:30 AM UTC
6+
- cron: '30 1 * * *'
7+
8+
jobs:
9+
close-issues-and-pull-requests:
10+
permissions:
11+
issues: write
12+
pull-requests: write
13+
actions: write
14+
runs-on: ubuntu-latest
15+
steps:
16+
- uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0
17+
with:
18+
# Increasing this value ensures that changes to this workflow
19+
# propagate to all issues and PRs in days rather than months
20+
operations-per-run: 1000
21+
22+
exempt-draft-pr: true
23+
exempt-issue-labels: 'keep-open'
24+
exempt-pr-labels: 'keep-open'
25+
26+
labels-to-add-when-unstale: 'unstale'
27+
labels-to-remove-when-stale: 'unstale'
28+
29+
days-before-issue-stale: 90
30+
days-before-issue-close: 30
31+
stale-issue-label: 'stale'
32+
stale-issue-message: >
33+
This issue has been automatically marked as stale because it has not
34+
had any activity within 90 days. It will be automatically closed if no
35+
further activity occurs within 30 days. Leave a comment if
36+
you feel this issue should remain open. Thank you!
37+
close-issue-message: >
38+
This issue has been automatically closed due to inactivity. Please
39+
feel free to reopen if you feel it is still relevant. Thank you!
40+
41+
days-before-pr-stale: 90
42+
days-before-pr-close: 30
43+
stale-pr-label: 'stale'
44+
stale-pr-message: >
45+
This pull request has been automatically marked as stale because it
46+
has not had any activity within 90 days. It will be automatically
47+
closed if no further activity occurs within 30 days. Leave a comment
48+
if you feel this pull request should remain open. Thank you!
49+
close-pr-message: >
50+
This pull request has been automatically closed due to inactivity.
51+
Please feel free to reopen if you intend to continue working on it.
52+
Thank you!

CMakeLists.txt

+11-9
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11
4949
# requirements.txt files and should be kept consistent. The ROCm torch
5050
# versions are derived from Dockerfile.rocm
5151
#
52-
set(TORCH_SUPPORTED_VERSION_CUDA "2.4.0")
52+
set(TORCH_SUPPORTED_VERSION_CUDA "2.5.0")
5353
set(TORCH_SUPPORTED_VERSION_ROCM "2.5.0")
5454

5555
#
@@ -196,12 +196,12 @@ endif()
196196

197197
#
198198
# Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process.
199-
# Configure it to place files in vllm/.deps, in order to play nicely with sccache.
199+
# setup.py will override FETCHCONTENT_BASE_DIR to play nicely with sccache.
200+
# Each dependency that produces build artifacts should override its BINARY_DIR to avoid
201+
# conflicts between build types. It should instead be set to ${CMAKE_BINARY_DIR}/<dependency>.
200202
#
201203
include(FetchContent)
202-
get_filename_component(PROJECT_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
203-
file(MAKE_DIRECTORY "${FETCHCONTENT_BASE_DIR}")
204-
set(FETCHCONTENT_BASE_DIR "${PROJECT_ROOT_DIR}/.deps")
204+
file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
205205
message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")
206206

207207
#
@@ -229,7 +229,6 @@ set(VLLM_EXT_SRC
229229
"csrc/quantization/compressed_tensors/int8_quant_kernels.cu"
230230
"csrc/quantization/fp8/common.cu"
231231
"csrc/cuda_utils_kernels.cu"
232-
"csrc/moe_align_block_size_kernels.cu"
233232
"csrc/prepare_inputs/advance_step.cu"
234233
"csrc/torch_bindings.cpp")
235234

@@ -286,7 +285,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
286285
message(STATUS "Building Marlin kernels for archs: ${MARLIN_ARCHS}")
287286
else()
288287
message(STATUS "Not building Marlin kernels as no compatible archs found"
289-
"in CUDA target architectures")
288+
" in CUDA target architectures")
290289
endif()
291290

292291
#
@@ -444,6 +443,7 @@ target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1)
444443

445444
set(VLLM_MOE_EXT_SRC
446445
"csrc/moe/torch_bindings.cpp"
446+
"csrc/moe/moe_align_sum_kernels.cu"
447447
"csrc/moe/topk_softmax_kernels.cu")
448448

449449
set_gencode_flags_for_srcs(
@@ -471,7 +471,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
471471
message(STATUS "Building Marlin MOE kernels for archs: ${MARLIN_MOE_ARCHS}")
472472
else()
473473
message(STATUS "Not building Marlin MOE kernels as no compatible archs found"
474-
"in CUDA target architectures")
474+
" in CUDA target architectures")
475475
endif()
476476
endif()
477477

@@ -549,8 +549,10 @@ else()
549549
FetchContent_Declare(
550550
vllm-flash-attn
551551
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
552-
GIT_TAG 013f0c4fc47e6574060879d9734c1df8c5c273bd
552+
GIT_TAG 5259c586c403a4e4d8bf69973c159b40cc346fb9
553553
GIT_PROGRESS TRUE
554+
# Don't share the vllm-flash-attn build between build types
555+
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn
554556
)
555557
endif()
556558

Dockerfile.openvino

+4-4
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ RUN --mount=type=bind,source=.git,target=.git \
1515
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
1616

1717
# install build requirements
18-
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/vllm/requirements-build.txt
18+
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/requirements-build.txt
1919
# build vLLM with OpenVINO backend
20-
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="openvino" python3 -m pip install /workspace/vllm/
20+
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="openvino" python3 -m pip install /workspace
2121

22-
COPY examples/ /workspace/vllm/examples
23-
COPY benchmarks/ /workspace/vllm/benchmarks
22+
COPY examples/ /workspace/examples
23+
COPY benchmarks/ /workspace/benchmarks
2424

2525
CMD ["/bin/bash"]

0 commit comments

Comments
 (0)