Skip to content

Commit

Permalink
reduce memory usage for nf4
Browse files Browse the repository at this point in the history
  • Loading branch information
Xia-Weiwen committed May 11, 2024
1 parent 85a01b0 commit 2c489f8
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions bitsandbytes/backends/cpu_xpu_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ def quantize_4bit_impl(
int(lowp_mode),
-1, # act_quant_mode. -1 means don't quant activation
)
state.absmax = torch.Tensor()
return torch.Tensor(), state

return out, state
Expand Down Expand Up @@ -444,6 +445,7 @@ def dequantize_4bit_impl(
assert quant_state.op_context is not None
A = quant_state.op_context.to_public(quant_state.op_context.get_weight())
A = A.reshape(-1)
absmax = quant_state.op_context.get_scales().reshape(-1)

if out is None:
out = torch.empty(quant_state.shape, dtype=quant_state.dtype, device=A.device)
Expand Down

0 comments on commit 2c489f8

Please sign in to comment.