Skip to content

Commit

Permalink
fix device
Browse files Browse the repository at this point in the history
Signed-off-by: ZX-ModelCloud <[email protected]>
  • Loading branch information
ZX-ModelCloud committed Feb 28, 2025
1 parent 32a468d commit e590454
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion tests/test_quant_and_eora_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from peft import PeftModel, LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

from peft.tuners.lora.gptq import GPTQLoraLinear

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# -- end do not touch

Expand Down Expand Up @@ -141,7 +143,7 @@ def test_quant_and_eora(self):

def bench(self, path: str, backend: BACKEND, adapter: Optional[Lora]):
# test post-quant inference
model = AutoModelForCausalLM.from_pretrained(path)
model = AutoModelForCausalLM.from_pretrained(path, device_map="cuda")
print("model", model)
if adapter:
log.info("PEFT: converting model to lora model")
Expand All @@ -151,6 +153,8 @@ def bench(self, path: str, backend: BACKEND, adapter: Optional[Lora]):
model.load_adapter(adapter.path)
print("peft model", model)

assert isinstance(model.model.layers[0].self_attn.v_proj, GPTQLoraLinear)

tokenizer = AutoTokenizer.from_pretrained(path)
inp = tokenizer("Capital of France is", return_tensors="pt").to(model.device)
tokens = model.generate(**inp)[0]
Expand Down

0 comments on commit e590454

Please sign in to comment.