Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tests/models/quantization/test_bitsandbytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@

from tests.quantization.utils import is_quant_method_supported
from vllm.platforms import current_platform
from vllm.platforms.rocm import on_gfx9
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sstamenk we should avoid importing platform specific variable without current_platform.is_rocm() guard.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed, thanks


from ...utils import compare_two_settings, multi_gpu_test
from ..utils import check_embeddings_close, check_logprobs_close

pytestmark = pytest.mark.skipif(
Copy link
Collaborator

@tjtanaa tjtanaa Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sstamenk let's do this instead.

if current_platform.is_rocm():
    from vllm.platforms.rocm import on_gfx9
    pytestmark = pytest.mark.skipif(
        on_gfx9(),
        reason="bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)",
)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Applied suggested changes, thanks

current_platform.is_rocm(),
reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)",
current_platform.is_rocm() and on_gfx9(),
reason="bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)",
)

models_4bit_to_test = [
Expand Down
3 changes: 3 additions & 0 deletions vllm/platforms/rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ class RocmPlatform(Platform):
"petit_nvfp4",
"torchao",
]
# bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)
if not on_gfx9():
supported_quantization += ["bitsandbytes"]

@classmethod
def get_vit_attn_backend(
Expand Down
Loading