From 5efc7fa491fab40a1957061ccb1061f8b654094a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Olejniczak?= Date: Fri, 28 Nov 2025 10:03:49 +0200 Subject: [PATCH 1/3] Add get_device_total_memory method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Olejniczak --- vllm_gaudi/platform.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm_gaudi/platform.py b/vllm_gaudi/platform.py index d05dce253..a6b359762 100644 --- a/vllm_gaudi/platform.py +++ b/vllm_gaudi/platform.py @@ -82,6 +82,12 @@ def set_device(cls, device: torch.device) -> None: def get_device_name(cls, device_id: int = 0) -> str: return cls.device_name + @classmethod + def get_device_total_memory(cls, device_id: int = 0) -> int: + """Get the total memory of a device in bytes.""" + total_hpu_memory = torch.hpu.mem_get_info()[1] + return total_hpu_memory + @classmethod def check_and_update_config(cls, vllm_config: VllmConfig) -> None: parallel_config = vllm_config.parallel_config From b9508262714c77a2961c8cc056fb208a9e1036a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Olejniczak?= Date: Fri, 28 Nov 2025 17:30:38 +0200 Subject: [PATCH 2/3] Add workaround for get_device_total_memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Olejniczak --- vllm_gaudi/platform.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/vllm_gaudi/platform.py b/vllm_gaudi/platform.py index a6b359762..00607b3d9 100644 --- a/vllm_gaudi/platform.py +++ b/vllm_gaudi/platform.py @@ -85,7 +85,17 @@ def get_device_name(cls, device_id: int = 0) -> str: @classmethod def get_device_total_memory(cls, device_id: int = 0) -> int: """Get the total memory of a device in bytes.""" - total_hpu_memory = torch.hpu.mem_get_info()[1] + # NOTE: This is a workaround. + # The correct implementation of the method in this place should look as follows: + # total_hpu_memory = torch.hpu.mem_get_info()[1] + # A value of 0 is returned to preserve the current logic in + # vllm/vllm/engine/arg_utils.py → get_batch_defaults() → + # default_max_num_batched_tokens, in order to avoid the + # error in hpu_perf_test, while also preventing a + # NotImplementedError in test_defaults_with_usage_context. + + total_hpu_memory = 0 + return total_hpu_memory @classmethod From cd5662942e6e43e4cae3ab192b1e706c82ef74e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Olejniczak?= Date: Thu, 4 Dec 2025 00:24:08 +0200 Subject: [PATCH 3/3] Add get_device_total_memory warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Olejniczak --- vllm_gaudi/platform.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm_gaudi/platform.py b/vllm_gaudi/platform.py index 00607b3d9..0f12ce30b 100644 --- a/vllm_gaudi/platform.py +++ b/vllm_gaudi/platform.py @@ -93,6 +93,8 @@ def get_device_total_memory(cls, device_id: int = 0) -> int: # default_max_num_batched_tokens, in order to avoid the # error in hpu_perf_test, while also preventing a # NotImplementedError in test_defaults_with_usage_context. + logger.warning("This is a workaround! Please check the NOTE " + "in the get_device_total_memory definition.") total_hpu_memory = 0