diff --git a/bitsandbytes/backends/cpu_xpu_common.py b/bitsandbytes/backends/cpu_xpu_common.py index 04755ed2d..c9ff7cb6f 100644 --- a/bitsandbytes/backends/cpu_xpu_common.py +++ b/bitsandbytes/backends/cpu_xpu_common.py @@ -484,7 +484,7 @@ def dequantize_4bit_impl( out_reshaped[n - rem :] = out_dq[n - rem :] * absmax[-1] # take transpose here because weight is transposed (again) for computation - return out.t() + return out # Do not need torch.compile here as we are calling torch/ipex kernel