From 5773b735a433a2477ec9dd85c001e60b57490b31 Mon Sep 17 00:00:00 2001 From: Yannick Schnider Date: Mon, 24 Mar 2025 16:08:20 +0100 Subject: [PATCH 1/8] fixing import for gptq functionalities Signed-off-by: Yannick Schnider --- vllm_spyre/model_executor/model_loader/spyre.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm_spyre/model_executor/model_loader/spyre.py b/vllm_spyre/model_executor/model_loader/spyre.py index de595be68..fb7edd0e1 100644 --- a/vllm_spyre/model_executor/model_loader/spyre.py +++ b/vllm_spyre/model_executor/model_loader/spyre.py @@ -127,13 +127,11 @@ def load_weights(self, model_config: ModelConfig, max_prompt_length: int, sys.path.append("/home/senuser/aiu-fms") if envs_spyre.VLLM_SPYRE_DYNAMO_BACKEND == "sendnn_decoder": - from aiu_as_addon import aiu_adapter, aiu_linear # noqa: F401 + from fms_mo.aiu_addons.gptq import gptq_aiu_adapter, gptq_aiu_linear # noqa: F401 linear_type = "gptq_aiu" - logger.info("Loaded `aiu_as_addon` functionalities") + logger.info("Loaded `aiu_addons` functionalities") else: - from cpu_addon import cpu_linear # noqa: F401 linear_type = "gptq_cpu" - logger.info("Loaded `cpu_addon` functionalities") quant_cfg = model_config._parse_quant_hf_config() From 8fa2c091a3ae73629c0e8ea59a09cdbade766156 Mon Sep 17 00:00:00 2001 From: Yannick Schnider Date: Mon, 24 Mar 2025 16:22:57 +0100 Subject: [PATCH 2/8] fix formatting Signed-off-by: Yannick Schnider --- vllm_spyre/model_executor/model_loader/spyre.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm_spyre/model_executor/model_loader/spyre.py b/vllm_spyre/model_executor/model_loader/spyre.py index fb7edd0e1..b70002f1c 100644 --- a/vllm_spyre/model_executor/model_loader/spyre.py +++ b/vllm_spyre/model_executor/model_loader/spyre.py @@ -127,7 +127,8 @@ def load_weights(self, model_config: ModelConfig, max_prompt_length: int, sys.path.append("/home/senuser/aiu-fms") if envs_spyre.VLLM_SPYRE_DYNAMO_BACKEND == "sendnn_decoder": - from fms_mo.aiu_addons.gptq import gptq_aiu_adapter, gptq_aiu_linear # noqa: F401 + from fms_mo.aiu_addons.gptq import ( # noqa: F401 + gptq_aiu_adapter, gptq_aiu_linear) linear_type = "gptq_aiu" logger.info("Loaded `aiu_addons` functionalities") else: From 36dfde71488d23bfa9a4cf33a7736c5598990e36 Mon Sep 17 00:00:00 2001 From: Yannick Schnider Date: Tue, 25 Mar 2025 13:49:39 +0100 Subject: [PATCH 3/8] warning gptq not working on CPU Signed-off-by: Yannick Schnider --- vllm_spyre/model_executor/model_loader/spyre.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm_spyre/model_executor/model_loader/spyre.py b/vllm_spyre/model_executor/model_loader/spyre.py index b70002f1c..ee353f63c 100644 --- a/vllm_spyre/model_executor/model_loader/spyre.py +++ b/vllm_spyre/model_executor/model_loader/spyre.py @@ -133,6 +133,7 @@ def load_weights(self, model_config: ModelConfig, max_prompt_length: int, logger.info("Loaded `aiu_addons` functionalities") else: linear_type = "gptq_cpu" + logger.warning("GPTQ is not expected to work on CPU.") quant_cfg = model_config._parse_quant_hf_config() From 204ec08f48761cf2f6a2c1d9c1004fd2a5c52fd0 Mon Sep 17 00:00:00 2001 From: Yannick Schnider Date: Wed, 26 Mar 2025 09:55:56 +0100 Subject: [PATCH 4/8] removing unused aiu-fms package Signed-off-by: Yannick Schnider --- vllm_spyre/model_executor/model_loader/spyre.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/vllm_spyre/model_executor/model_loader/spyre.py b/vllm_spyre/model_executor/model_loader/spyre.py index ee353f63c..240b43f36 100644 --- a/vllm_spyre/model_executor/model_loader/spyre.py +++ b/vllm_spyre/model_executor/model_loader/spyre.py @@ -121,11 +121,6 @@ def load_weights(self, model_config: ModelConfig, max_prompt_length: int, model_config.dtype, self.dtype) if model_config.quantization == "gptq": - - # note, we have to find a better way to package this - # shouldn't it be part of FMS? - sys.path.append("/home/senuser/aiu-fms") - if envs_spyre.VLLM_SPYRE_DYNAMO_BACKEND == "sendnn_decoder": from fms_mo.aiu_addons.gptq import ( # noqa: F401 gptq_aiu_adapter, gptq_aiu_linear) From 9fc8f8c2ff52f1f7ccb876daeca548672aac4b9c Mon Sep 17 00:00:00 2001 From: Yannick Schnider Date: Wed, 26 Mar 2025 09:57:41 +0100 Subject: [PATCH 5/8] format fixing Signed-off-by: Yannick Schnider --- vllm_spyre/model_executor/model_loader/spyre.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm_spyre/model_executor/model_loader/spyre.py b/vllm_spyre/model_executor/model_loader/spyre.py index 240b43f36..d5342749b 100644 --- a/vllm_spyre/model_executor/model_loader/spyre.py +++ b/vllm_spyre/model_executor/model_loader/spyre.py @@ -1,6 +1,5 @@ """Utilities for selecting and loading Spyre models.""" import os -import sys from typing import Optional import torch From 95432065ec84a4b88a083a720699f2fc8dffdf80 Mon Sep 17 00:00:00 2001 From: Yannick Schnider Date: Wed, 26 Mar 2025 13:37:57 +0100 Subject: [PATCH 6/8] adding fms-mo as dependency Signed-off-by: Yannick Schnider --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 55082fdc8..f4bee4448 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ # IBM foundation model stack ibm-fms==0.0.8 +fms-model-optimizer==0.2.0 wurlitzer From 4148d2da9ca1dbd647088d5bc984091f2938e964 Mon Sep 17 00:00:00 2001 From: Yannick Schnider Date: Wed, 26 Mar 2025 14:06:33 +0100 Subject: [PATCH 7/8] dont specify version Signed-off-by: Yannick Schnider --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f4bee4448..2730fd1bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # IBM foundation model stack ibm-fms==0.0.8 -fms-model-optimizer==0.2.0 +fms-model-optimizer wurlitzer From 0924af90705e2f260c1b2c3370294b7fb5de2c21 Mon Sep 17 00:00:00 2001 From: Yannick Schnider Date: Thu, 27 Mar 2025 16:41:48 +0100 Subject: [PATCH 8/8] removing fms-mo in requirements Signed-off-by: Yannick Schnider --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2730fd1bc..55082fdc8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ # IBM foundation model stack ibm-fms==0.0.8 -fms-model-optimizer wurlitzer