diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6d14162d0..b81bdeaa0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,13 +3,23 @@ ci:
   autoupdate_schedule: quarterly
 
 repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-json
+      - id: check-yaml
+      - id: debug-statements
+      - id: mixed-line-ending
+        args: [--fix=lf]
+
   - repo: https://github.com/Lucas-C/pre-commit-hooks
     rev: v1.5.5
     hooks:
       - id: insert-license
         files: |
           (?x)^(
-            auto_round/.*(py|yaml|yml|sh)
+            auto_round/.*(py|yaml|yml|sh)|
+            auto_round_extension/.*(py|yaml|yml|sh)
           )$
         args:
           [
@@ -26,7 +36,14 @@ repos:
         args: [-w]
         additional_dependencies:
           - tomli
-        exclude: |
-          (?x)^(
-              examples/.*(txt|patch)
-          )$
+
+  - repo: https://github.com/pycqa/isort
+    rev: 6.0.1
+    hooks:
+      - id: isort
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.12.4
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix, --no-cache]
diff --git a/auto_round/__main__.py b/auto_round/__main__.py
index 1a8c885d7..003035922 100644
--- a/auto_round/__main__.py
+++ b/auto_round/__main__.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import sys
 
+
 def run_eval():
     from auto_round.script.llm import setup_eval_parser
     args = setup_eval_parser()
@@ -58,7 +59,7 @@ def run_fast():
 
 def run_mllm():
     if "--eval" in sys.argv:
-        from auto_round.script.llm import setup_eval_parser, eval
+        from auto_round.script.llm import eval, setup_eval_parser
         sys.argv.remove("--eval")
         args = setup_eval_parser()
         args.mllm = True
@@ -76,7 +77,7 @@ def run_mllm():
 
 def run_lmms():
     # from auto_round.script.lmms_eval import setup_lmms_args, eval
-    from auto_round.script.mllm import setup_lmms_parser, lmms_eval
+    from auto_round.script.mllm import lmms_eval, setup_lmms_parser
     args = setup_lmms_parser()
     lmms_eval(args)
 
diff --git a/auto_round/autoround.py b/auto_round/autoround.py
index 6a11b075f..056308f41 100644
--- a/auto_round/autoround.py
+++ b/auto_round/autoround.py
@@ -12,54 +12,62 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import os
 import re
 import sys
+import time
+from typing import Any, Union
 
+import accelerate
 import torch
-import copy
-import time
-from typing import Union, Any
-from transformers import set_seed
 from torch import autocast
 from tqdm import tqdm
-import accelerate
+from transformers import set_seed
 
+from auto_round.data_type import QUANT_FUNC_WITH_DTYPE
 from auto_round.export.export_to_gguf.config import GGUF_CONFIG, GGUF_INNER_CONFIG, ModelType
-from auto_round.wrapper import WrapperMultiblock, wrapper_block, unwrapper_block, WrapperLinear, unwrapper_layer
+from auto_round.low_cpu_mem.utils import get_layers_before_block
 from auto_round.utils import (
+    SUPPORTED_DTYPES,
+    SUPPORTED_LAYER_TYPES,
+    TORCH_VERSION_AT_LEAST_2_6,
     CpuInfo,
+    _gguf_args_check,
     block_forward,
     check_is_cpu,
+    check_seqlen_compatible,
+    check_skippable_keywords,
     check_to_quantized,
+    clear_memory,
     collect_best_params,
+    compile_func,
     convert_dtype_str2torch,
     detect_device,
+    find_matching_blocks,
+    flatten_list,
     get_block_names,
+    get_layer_config_by_gguf_format,
+    get_layer_features,
+    get_layer_names_in_block,
+    get_lm_head_name,
     get_module,
+    get_shared_keys,
     htcore,
+    infer_bits_by_data_type,
+    init_cache,
+    is_debug_mode,
     is_optimum_habana_available,
+    llm_load_model,
     logger,
-    to_device,
-    to_dtype,
-    get_layer_names_in_block,
     mv_module_from_gpu,
-    unsupport_meta_device, clear_memory,
-    compile_func,
-    find_matching_blocks, is_debug_mode,
-    TORCH_VERSION_AT_LEAST_2_6,
-    SUPPORTED_LAYER_TYPES,
-    get_layer_features,
-    set_module,
-    llm_load_model,
     reset_params,
-    init_cache, check_skippable_keywords, get_shared_keys, SUPPORTED_DTYPES, infer_bits_by_data_type,
-    _gguf_args_check,
-    check_seqlen_compatible,
-    get_layer_config_by_gguf_format, get_lm_head_name, flatten_list
+    set_module,
+    to_device,
+    to_dtype,
+    unsupport_meta_device,
 )
-from auto_round.data_type import QUANT_FUNC_WITH_DTYPE
-from auto_round.low_cpu_mem.utils import get_layers_before_block
+from auto_round.wrapper import WrapperLinear, WrapperMultiblock, unwrapper_block, unwrapper_layer, wrapper_block
 
 
 class AutoRound(object):
@@ -232,9 +240,9 @@ def __init__(
         self.cache_device = torch.device("cpu") if self.low_gpu_mem_usage else self.device
 
         ##activation
-        self.act_group_size = act_group_size if not (act_group_size is None) else self.group_size
-        self.act_bits = act_bits if not (act_bits is None) else self.bits
-        self.act_sym = act_sym if not (act_sym is None) else self.sym
+        self.act_group_size = act_group_size if act_group_size is not None else self.group_size
+        self.act_bits = act_bits if act_bits is not None else self.bits
+        self.act_sym = act_sym if act_sym is not None else self.sym
         self.act_dynamic = act_dynamic
         self.act_data_type = act_data_type
         if self.act_data_type is None:
@@ -272,7 +280,7 @@ def __init__(
 
         self.enable_torch_compile = enable_torch_compile
         if not self.enable_torch_compile and TORCH_VERSION_AT_LEAST_2_6 and self.act_bits > 8 and not is_debug_mode() \
-                and self.low_cpu_mem_usage != True and "fp8" not in self.data_type and "fp8" not in self.act_data_type:
+                and not self.low_cpu_mem_usage and "fp8" not in self.data_type and "fp8" not in self.act_data_type:
             logger.info("'enable_torch_compile' is set to `False` by default. " \
                         "Enabling it can reduce tuning cost by 20%, but it might throw an exception.")
 
@@ -280,7 +288,7 @@ def __init__(
             self.enable_torch_compile = False
             logger.warning("reset enable_torch_compile to `False` as activation quantization is enabled")
 
-        if self.low_cpu_mem_usage == True and self.enable_torch_compile:
+        if self.low_cpu_mem_usage and self.enable_torch_compile:
             self.enable_torch_compile = False
             logger.warning("reset enable_torch_compile to `False` as low_cpu_mem_usage is enabled")
 
@@ -535,7 +543,7 @@ def parse_format_to_list(self, format: str) -> list:
                 only_gguf = False
             if only_gguf:
                 self.scale_dtype = torch.float32
-                logger.info(f"change `scale_dtype` to `torch.float32`")
+                logger.info("change `scale_dtype` to `torch.float32`")
 
         # Adjust format settings based on compatibility
         for index in range(len(formats)):
@@ -592,7 +600,7 @@ def _check_supported_format(self, format: str) -> bool:
                         f" group_size={self.group_size}, sym={self.sym}, act_bits={self.act_bits}"
                 elif format != "fake":
                     logger.warning(
-                        f"Currently only support to export auto_round format quantized model"
+                        "Currently only support to export auto_round format quantized model"
                         " with fp8 dtype activation for activation quantization."
                         " Change format to fake and save."
                     )
@@ -1320,7 +1328,7 @@ def quantize(self):
             keys = inputs.keys()
             input_id_str = [key for key in keys if key.startswith('hidden_state')]
             if len(input_id_str) != 1:
-                raise RuntimeError(f"hidden_states arg mismatch error,"
+                raise RuntimeError("hidden_states arg mismatch error,"
                                    "please raise an issue in https://github.com/intel/auto-round/issues")
             inputs["input_ids"] = inputs.pop(input_id_str[0], None)
             if q_inputs is not None:
@@ -1853,10 +1861,10 @@ def forward(m, hidden_states=None, *positional_inputs, **kwargs):
                         self.batch_dim = 1
                         if len(hidden_states.shape) > 1 and hidden_states.shape[1] > self.batch_size:
                             logger.error(
-                                f"this model has not been supported, "
-                                f"please raise an issue in https://github.com/intel/auto-round/issues"
-                                f" or try to set the `batch_size` to 1 and "
-                                f"`gradient_accumulate_steps` to your current batch size.")
+                                "this model has not been supported, "
+                                "please raise an issue in https://github.com/intel/auto-round/issues"
+                                " or try to set the `batch_size` to 1 and "
+                                "`gradient_accumulate_steps` to your current batch size.")
                             exit(-1)
 
             if hidden_states is not None:
@@ -2094,8 +2102,7 @@ def get_act_max_hook(module, input, output):
         hook_handles = []
 
         for n, m in model.named_modules():
-            # for block
-            if hasattr(m, "act_dynamic") and m.act_dynamic == False and check_to_quantized(m):
+            if hasattr(m, "act_dynamic") and not m.act_dynamic and check_to_quantized(m):
                 hook = m.register_forward_hook(get_act_max_hook)
                 hook_handles.append(hook)
                 continue
@@ -2405,7 +2412,7 @@ def quant_blocks(
                                 model_type=model_type)
                         else:
                             PACKING_LAYER_WITH_FORMAT[target_backend](tmp_m.tmp_name, self.model, self.formats[0])
-        pbar.set_description(f"Quantizing done")
+        pbar.set_description("Quantizing done")
         pbar.update(1)
         pbar.close()
 
diff --git a/auto_round/calib_dataset.py b/auto_round/calib_dataset.py
index 6fdd2b32b..de4eba825 100644
--- a/auto_round/calib_dataset.py
+++ b/auto_round/calib_dataset.py
@@ -14,12 +14,12 @@
 
 import json
 import random
+import sys
 
 import torch
-from datasets import Dataset, IterableDataset, load_dataset, concatenate_datasets
-from datasets import Features, Sequence, Value
+from datasets import Dataset, Features, IterableDataset, Sequence, Value, concatenate_datasets, load_dataset
 from torch.utils.data import DataLoader
-import sys
+
 from .utils import is_local_path, logger
 
 CALIB_DATASETS = {}
@@ -136,7 +136,7 @@ def get_pile_dataset(tokenizer, seqlen, dataset_name="NeelNanda/pile-10k", split
         error_message = str(e)
         # Check for proxy or SSL error
         if "proxy" in error_message.lower() or isinstance(e, ssl.SSLError) or "SSL" in error_message.upper():
-            logger.error(f"Network error detected, please checking proxy settings." \
+            logger.error("Network error detected, please checking proxy settings." \
                          "Error: {error_message}. Or consider using a backup dataset by `pip install modelscope`" \
                          " and set '--dataset swift/pile-val-backup' in AutoRound API.")
         else:
diff --git a/auto_round/data_type/fp8.py b/auto_round/data_type/fp8.py
index 70873e6ed..6253d662e 100644
--- a/auto_round/data_type/fp8.py
+++ b/auto_round/data_type/fp8.py
@@ -13,9 +13,15 @@
 # limitations under the License.
 
 import torch
-from auto_round.data_type.utils import get_gaudi_fp8_ste_func, float8_e4m3fn_ste, reshape_pad_tensor_by_group_size, \
-    revert_tensor_by_pad, float8_e5m2_ste
+
 from auto_round.data_type.register import register_dtype
+from auto_round.data_type.utils import (
+    float8_e4m3fn_ste,
+    float8_e5m2_ste,
+    get_gaudi_fp8_ste_func,
+    reshape_pad_tensor_by_group_size,
+    revert_tensor_by_pad,
+)
 
 
 @register_dtype(("fp8_sym","fp8","fp8_e4m3"))
diff --git a/auto_round/data_type/gguf.py b/auto_round/data_type/gguf.py
index eba6db810..0d094f41d 100644
--- a/auto_round/data_type/gguf.py
+++ b/auto_round/data_type/gguf.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import torch
-from auto_round.data_type.utils import round_ste, reshape_pad_tensor_by_group_size, revert_tensor_by_pad, logger
+
 from auto_round.data_type.register import register_dtype
+from auto_round.data_type.utils import logger, reshape_pad_tensor_by_group_size, revert_tensor_by_pad, round_ste
 from auto_round.utils import get_reciprocal
 
 
@@ -506,7 +507,7 @@ def quant_tensor_gguf_sym_dq(
     Returns:
         Quantized and de-quantized tensor, scale, zero-point
     """
-    from auto_round.export.export_to_gguf.config import QK_K, K_SCALE_SIZE, GGML_QUANT_SIZES
+    from auto_round.export.export_to_gguf.config import GGML_QUANT_SIZES, K_SCALE_SIZE, QK_K
     from auto_round.export.export_to_gguf.packing import make_q3_quants, make_qx_quants
 
     if bits not in [3, 6]:
diff --git a/auto_round/data_type/int.py b/auto_round/data_type/int.py
index 5367b6925..be5d7cf33 100644
--- a/auto_round/data_type/int.py
+++ b/auto_round/data_type/int.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import torch
-from auto_round.data_type.utils import round_ste, reshape_pad_tensor_by_group_size, revert_tensor_by_pad
+
 from auto_round.data_type.register import register_dtype
+from auto_round.data_type.utils import reshape_pad_tensor_by_group_size, revert_tensor_by_pad, round_ste
 
 
 @register_dtype("int_sym")
diff --git a/auto_round/data_type/mxfp.py b/auto_round/data_type/mxfp.py
index af115311d..f5ad2aa08 100644
--- a/auto_round/data_type/mxfp.py
+++ b/auto_round/data_type/mxfp.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import torch
-from auto_round.data_type.utils import floor_ste, round_ste, reshape_pad_tensor_by_group_size, revert_tensor_by_pad
-from auto_round.data_type.register import register_dtype, QUANT_FUNC_WITH_DTYPE
+
+from auto_round.data_type.register import QUANT_FUNC_WITH_DTYPE, register_dtype
+from auto_round.data_type.utils import floor_ste, reshape_pad_tensor_by_group_size, revert_tensor_by_pad, round_ste
 
 MXFP_FORMAT_CACHE = {
     # data type: ebits, mbits, emax, max_norm, min_norm
diff --git a/auto_round/data_type/nvfp.py b/auto_round/data_type/nvfp.py
index d1293e899..147264c68 100644
--- a/auto_round/data_type/nvfp.py
+++ b/auto_round/data_type/nvfp.py
@@ -16,7 +16,7 @@
 
 from auto_round.data_type.fp8 import float8_e4m3fn_ste
 from auto_round.data_type.register import register_dtype
-from auto_round.data_type.utils import reshape_pad_tensor_by_group_size, revert_tensor_by_pad, logger
+from auto_round.data_type.utils import logger, reshape_pad_tensor_by_group_size, revert_tensor_by_pad
 
 
 # taken from
diff --git a/auto_round/data_type/utils.py b/auto_round/data_type/utils.py
index 5265d55fc..748417038 100644
--- a/auto_round/data_type/utils.py
+++ b/auto_round/data_type/utils.py
@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from functools import lru_cache
+
 import torch
+
 from auto_round.data_type.register import QUANT_FUNC_WITH_DTYPE
-from functools import lru_cache
 from auto_round.utils import logger
 
+
 def reshape_pad_tensor_by_group_size(data: torch.Tensor, group_size: int):
     """Reshapes and pads the tensor to ensure that it can be quantized in groups of `group_size`.
 
diff --git a/auto_round/data_type/w4fp8.py b/auto_round/data_type/w4fp8.py
index 9b5123759..1dd550b22 100644
--- a/auto_round/data_type/w4fp8.py
+++ b/auto_round/data_type/w4fp8.py
@@ -15,8 +15,7 @@
 import torch
 
 from auto_round.data_type.register import register_dtype
-from auto_round.data_type.utils import get_gaudi_fp8_ste_func, float8_e4m3fn_ste
-
+from auto_round.data_type.utils import float8_e4m3fn_ste, get_gaudi_fp8_ste_func
 
 # @register_dtype("fp8_gaudi3_to_int_sym")
 # def progressive_quant_fp8_int4_gaudi3(
diff --git a/auto_round/eval/evaluation.py b/auto_round/eval/evaluation.py
index 5536c48c4..293379bab 100644
--- a/auto_round/eval/evaluation.py
+++ b/auto_round/eval/evaluation.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 from typing import Optional, Union
 
 from lm_eval import simple_evaluate as lm_simple_evaluate
-import os
 
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
diff --git a/auto_round/export/export_to_autogptq/export.py b/auto_round/export/export_to_autogptq/export.py
index 34dd7aea0..d74040419 100644
--- a/auto_round/export/export_to_autogptq/export.py
+++ b/auto_round/export/export_to_autogptq/export.py
@@ -12,6 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
+import inspect
+import json
+import os
+from concurrent.futures import ThreadPoolExecutor
+
+import threadpoolctl as tctl
+
 # MIT License
 #
 # Copyright (c) 2023 潘其威(William)
@@ -34,22 +42,21 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 import torch
-
-import auto_round.export.export_to_autogptq.qlinear_triton
-from auto_round.utils import check_to_quantized, get_block_names, \
-    get_module, logger, set_module, SUPPORTED_LAYER_TYPES, filter_quantization_config
-import copy
-import json
-import os
-
 import torch.nn as nn
 import transformers
-
-import threadpoolctl as tctl
-import inspect
 from tqdm import tqdm
-from concurrent.futures import ThreadPoolExecutor
-from auto_round.utils import get_autogptq_packing_qlinear
+
+import auto_round.export.export_to_autogptq.qlinear_triton
+from auto_round.utils import (
+    SUPPORTED_LAYER_TYPES,
+    check_to_quantized,
+    filter_quantization_config,
+    get_autogptq_packing_qlinear,
+    get_block_names,
+    get_module,
+    logger,
+    set_module,
+)
 
 BLOCK_PATTERNS = [  ## copy from transformers optimum
     "transformer.h",
@@ -142,7 +149,7 @@ def save_quantized_as_autogptq(output_dir, inplace=True, backend="auto_gptq:exll
     flattened_list = [item for sublist in all_blocks for item in sublist]
     common_prefix = os.path.commonprefix(flattened_list).rstrip('.')
     if common_prefix not in BLOCK_PATTERNS:
-        logger.error(f"auto-gptq format may not support loading this quantized model")
+        logger.error("auto-gptq format may not support loading this quantized model")
         quantization_config['block_name_to_quantize'] = common_prefix
     quantization_config.pop("to_quant_block_names", None)
 
diff --git a/auto_round/export/export_to_autogptq/qlinear_triton.py b/auto_round/export/export_to_autogptq/qlinear_triton.py
index c51920109..671737d3b 100644
--- a/auto_round/export/export_to_autogptq/qlinear_triton.py
+++ b/auto_round/export/export_to_autogptq/qlinear_triton.py
@@ -19,6 +19,7 @@
 import torch.nn as nn
 import transformers
 
+
 class TritonModuleMixin:
     @classmethod
     def warmup(cls, model, transpose=False, seqlen=2048):
diff --git a/auto_round/export/export_to_autoround/export.py b/auto_round/export/export_to_autoround/export.py
index 10da020a2..a0ac9ae8b 100644
--- a/auto_round/export/export_to_autoround/export.py
+++ b/auto_round/export/export_to_autoround/export.py
@@ -14,20 +14,28 @@
 
 
 import copy
+import inspect
 import json
 import os
+from concurrent.futures import ThreadPoolExecutor
 
+import threadpoolctl as tctl
 import torch
 import torch.nn as nn
 import transformers
-
-from auto_round.utils import get_module, logger, set_module, SUPPORTED_LAYER_TYPES, check_to_quantized, \
-    filter_quantization_config, SUPPORTED_FORMATS
-import threadpoolctl as tctl
-import inspect
 from tqdm import tqdm
-from concurrent.futures import ThreadPoolExecutor
-from auto_round.utils import get_autogptq_packing_qlinear, check_start_with_block_name
+
+from auto_round.utils import (
+    SUPPORTED_FORMATS,
+    SUPPORTED_LAYER_TYPES,
+    check_start_with_block_name,
+    check_to_quantized,
+    filter_quantization_config,
+    get_autogptq_packing_qlinear,
+    get_module,
+    logger,
+    set_module,
+)
 
 
 def check_neq_config(config, data_type, bits, group_size, sym):
@@ -83,7 +91,7 @@ def dynamic_import_quant_linear_for_packing(backend, bits, group_size, sym, act_
     elif "awq" in backend:
         from ..export_to_awq.utils import WQLinear_GEMM
         return WQLinear_GEMM
-    elif "gptq" in backend and not "gptqmodel" in backend:  ## have g_idx
+    elif "gptq" in backend and "gptqmodel" not in backend:  ## have g_idx
         return get_autogptq_packing_qlinear(backend, bits, group_size, sym)
     else:
         raise ValueError(
@@ -261,7 +269,7 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="auto_round:ex
         return save_quantized_as_autoround(output_dir, inplace=inplace, backend="auto_round", **kwargs)
 
     ##if using sym, we change to gptq sym kernel to avoid compiling from auto_round source
-    if (kwargs.get("sym") is None or kwargs.get("sym") == True) and ("gptq" not in backend and "awq" not in backend):
+    if (kwargs.get("sym") is None or kwargs.get("sym")) and ("gptq" not in backend and "awq" not in backend):
         backend = backend.replace('auto_round', 'auto_round:auto_gptq')
 
     model = kwargs["model"]
@@ -279,8 +287,8 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="auto_round:ex
     processor = kwargs.get("processor", None)
     extra_config = {}
     block_name_to_quantize = quantization_config["block_name_to_quantize"]
-    if isinstance(block_name_to_quantize, str): \
-            block_name_to_quantize = block_name_to_quantize.split(",")
+    if isinstance(block_name_to_quantize, str):
+        block_name_to_quantize = block_name_to_quantize.split(",")
     elif isinstance(block_name_to_quantize, list):
         for i in range(len(block_name_to_quantize)):
             block_name_to_quantize[i] = os.path.commonprefix(block_name_to_quantize[i]).rstrip('.')
diff --git a/auto_round/export/export_to_autoround/export_to_fp8_woq.py b/auto_round/export/export_to_autoround/export_to_fp8_woq.py
index d29e021c2..02ef4d755 100644
--- a/auto_round/export/export_to_autoround/export_to_fp8_woq.py
+++ b/auto_round/export/export_to_autoround/export_to_fp8_woq.py
@@ -13,15 +13,24 @@
 # limitations under the License.
 
 import copy
-import os
-import torch
 import json
-from auto_round.utils import logger, set_module, SUPPORTED_LAYER_TYPES, check_to_quantized, \
-    filter_quantization_config, get_module, check_start_with_block_name
+import os
+from concurrent.futures import ThreadPoolExecutor
+
 import threadpoolctl as tctl
+import torch
 import transformers
 from tqdm import tqdm
-from concurrent.futures import ThreadPoolExecutor
+
+from auto_round.utils import (
+    SUPPORTED_LAYER_TYPES,
+    check_start_with_block_name,
+    check_to_quantized,
+    filter_quantization_config,
+    get_module,
+    logger,
+    set_module,
+)
 
 
 def check_neq_config(config, data_type, bits, group_size, sym):
@@ -169,8 +178,8 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="auto_round",
     processor = kwargs.get("processor", None)
     extra_config = {}
     block_name_to_quantize = quantization_config["block_name_to_quantize"]
-    if isinstance(block_name_to_quantize, str): \
-            block_name_to_quantize = block_name_to_quantize.split(",")
+    if isinstance(block_name_to_quantize, str):
+        block_name_to_quantize = block_name_to_quantize.split(",")
     elif isinstance(block_name_to_quantize, list):
         for i in range(len(block_name_to_quantize)):
             block_name_to_quantize[i] = os.path.commonprefix(block_name_to_quantize[i]).rstrip('.')
diff --git a/auto_round/export/export_to_awq/export.py b/auto_round/export/export_to_awq/export.py
index 4ef02662e..22c13048e 100644
--- a/auto_round/export/export_to_awq/export.py
+++ b/auto_round/export/export_to_awq/export.py
@@ -20,22 +20,28 @@
 # SOFTWARE.
 
 
-import os
-import torch
-import torch.nn as nn
-
-from auto_round.utils import (logger, get_module,
-                              set_module,
-                              check_to_quantized,
-                              get_block_names,
-                              extract_block_names_to_str, SUPPORTED_LAYER_TYPES, filter_quantization_config)
 import copy
 import json
-from auto_round.export.export_to_awq.utils import WQLinear_GEMM
+import os
 from concurrent.futures import ThreadPoolExecutor
+
 import threadpoolctl as tctl
+import torch
+import torch.nn as nn
 from tqdm import tqdm
 
+from auto_round.export.export_to_awq.utils import WQLinear_GEMM
+from auto_round.utils import (
+    SUPPORTED_LAYER_TYPES,
+    check_to_quantized,
+    extract_block_names_to_str,
+    filter_quantization_config,
+    get_block_names,
+    get_module,
+    logger,
+    set_module,
+)
+
 
 def pack_layer(name, model, backend):
     if name == "lm_head":  ##dese not support lm-head
diff --git a/auto_round/export/export_to_awq/utils.py b/auto_round/export/export_to_awq/utils.py
index 64e826e9e..72e218df5 100644
--- a/auto_round/export/export_to_awq/utils.py
+++ b/auto_round/export/export_to_awq/utils.py
@@ -33,8 +33,9 @@
 # SOFTWARE.
 
 import gc
-import torch
 import warnings
+
+import torch
 import torch.nn as nn
 from torch.autograd import Function
 
diff --git a/auto_round/export/export_to_gguf/config.py b/auto_round/export/export_to_gguf/config.py
index d2b6a821e..e6b71586b 100644
--- a/auto_round/export/export_to_gguf/config.py
+++ b/auto_round/export/export_to_gguf/config.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from enum import IntEnum
 
+
 class ModelType(IntEnum):
     TEXT = 1
     MMPROJ = 2
diff --git a/auto_round/export/export_to_gguf/convert.py b/auto_round/export/export_to_gguf/convert.py
index 45b99d2f0..ea69b0bbd 100644
--- a/auto_round/export/export_to_gguf/convert.py
+++ b/auto_round/export/export_to_gguf/convert.py
@@ -34,29 +34,29 @@
 
 from __future__ import annotations
 
-import ast
-import gc
-import logging
 import argparse
+import ast
 import contextlib
+import gc
 import json
+import logging
+import math
 import os
 import re
 import sys
-import psutil
 from enum import IntEnum
-from pathlib import Path
 from hashlib import sha256
-from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast
 from itertools import chain
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast
 
-import math
 import numpy as np
+import psutil
 import torch
 from transformers import AutoConfig
 
-from auto_round.utils import logger, LazyImport, get_module, clean_module_parameter
 from auto_round.export.export_to_gguf.packing import ggml_quant
+from auto_round.utils import LazyImport, clean_module_parameter, get_module, logger
 
 gguf = LazyImport("gguf")
 
@@ -4741,8 +4741,7 @@ def phantom(tok):
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
         del bid  # unused
 
-        if name.startswith("bert."):
-            name = name[5:]
+        name = name.removeprefix("bert.")
 
         if name.endswith(".gamma"):
             name = name[:-6] + ".weight"
@@ -4798,6 +4797,7 @@ def _xlmroberta_set_vocab(self) -> None:
                 raise FileNotFoundError(f"File not found: {tokenizer_path}")
 
             from base64 import b64decode
+
             from transformers import AutoTokenizer
             tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
 
@@ -4918,8 +4918,7 @@ def set_gguf_parameters(self):
         super().set_gguf_parameters()
 
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
-        if name.startswith("distilbert."):
-            name = name[11:]
+        name = name.removeprefix("distilbert.")
 
         # These layers act as MLM head, so we don't need them
         if name.startswith("vocab_"):
@@ -4960,8 +4959,7 @@ def set_vocab(self):
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
         # if name starts with "roberta.", remove the prefix
         # e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
-        if name.startswith("roberta."):
-            name = name[8:]
+        name = name.removeprefix("roberta.")
 
         # position embeddings start at pad_token_id + 1, so just chop down the weight tensor
         if name == "embeddings.position_embeddings.weight":
@@ -5076,8 +5074,7 @@ def modify_tensors(self, data_torch, name, bid):
         if name.startswith("decoder."):
             return []
 
-        if name.startswith("model."):
-            name = name[6:]
+        name = name.removeprefix("model.")
 
         return super().modify_tensors(data_torch, name, bid)
 
@@ -5096,8 +5093,7 @@ def set_vocab(self):
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
         # if name starts with "roberta.", remove the prefix
         # e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
-        if name.startswith("roberta."):
-            name = name[8:]
+        name = name.removeprefix("roberta.")
 
         # position embeddings start at pad_token_id + 1, so just chop down the weight tensor
         if name == "embeddings.position_embeddings.weight":
diff --git a/auto_round/export/export_to_gguf/export.py b/auto_round/export/export_to_gguf/export.py
index 14b8d0993..e42b49f95 100644
--- a/auto_round/export/export_to_gguf/export.py
+++ b/auto_round/export/export_to_gguf/export.py
@@ -13,14 +13,23 @@
 # limitations under the License.
 
 import os
-import sys
 import shutil
-import torch
-from pathlib import Path
+import sys
 import time
+from pathlib import Path
+
+import torch
+
 from auto_round.export.export_to_gguf.convert import ModelBase, ModelType, get_model_architecture
-from auto_round.utils import logger, LazyImport, get_block_names, flatten_list, check_to_quantized, get_module, \
-    clear_memory
+from auto_round.utils import (
+    LazyImport,
+    check_to_quantized,
+    clear_memory,
+    flatten_list,
+    get_block_names,
+    get_module,
+    logger,
+)
 
 TMP_DIR_NAME = "tmp_dir"
 
diff --git a/auto_round/export/export_to_gguf/packing.py b/auto_round/export/export_to_gguf/packing.py
index b377fdf28..2dfe3b82b 100644
--- a/auto_round/export/export_to_gguf/packing.py
+++ b/auto_round/export/export_to_gguf/packing.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import torch
 import numpy as np
+import torch
 
-from auto_round.export.export_to_gguf.config import QK_K, K_SCALE_SIZE, GGML_QUANT_SIZES
+from auto_round.export.export_to_gguf.config import GGML_QUANT_SIZES, K_SCALE_SIZE, QK_K
 from auto_round.utils import get_reciprocal
 
 GGML_QUANT_TYPE = {}
diff --git a/auto_round/export/export_to_itrex/export.py b/auto_round/export/export_to_itrex/export.py
index 435eed66d..34aa1aed7 100644
--- a/auto_round/export/export_to_itrex/export.py
+++ b/auto_round/export/export_to_itrex/export.py
@@ -19,14 +19,14 @@
 
 import torch
 import transformers
+
 from auto_round.export.register import register_format
-from auto_round.utils import get_module, logger, set_module, detect_device, check_to_quantized
+from auto_round.utils import check_to_quantized, detect_device, get_module, logger, set_module
 
 from .config import QuantConfig
 from .model_wrapper import WeightOnlyLinear
 
 
-
 def quant_weight_w_scale(weight, scale, zp, group_size=-1, device="cpu"):
     """Quant and dequant tensor with group size.
 
diff --git a/auto_round/export/export_to_itrex/model_wrapper.py b/auto_round/export/export_to_itrex/model_wrapper.py
index c8e985939..05d0ce45e 100644
--- a/auto_round/export/export_to_itrex/model_wrapper.py
+++ b/auto_round/export/export_to_itrex/model_wrapper.py
@@ -19,12 +19,13 @@
 # since the model classes inherit torch.nn.Module.
 import math
 
+import numpy as np
 import torch
 from packaging.version import Version
 from torch.autograd import Function
 from torch.nn import functional as F
-import numpy as np
-from auto_round.utils import logger, can_pack_with_numba
+
+from auto_round.utils import can_pack_with_numba, logger
 
 NF4 = [
     -1.0,
diff --git a/auto_round/export/export_to_llmcompressor/export.py b/auto_round/export/export_to_llmcompressor/export.py
index a60338752..d9e5dc50d 100644
--- a/auto_round/export/export_to_llmcompressor/export.py
+++ b/auto_round/export/export_to_llmcompressor/export.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import os
+
 import torch
-from auto_round.utils import get_module, logger, set_module, detect_device
-from auto_round.wrapper import WrapperWALayer
+
 from auto_round.export.export_to_llmcompressor.config import quantization_config
+from auto_round.utils import detect_device, get_module, logger, set_module
+from auto_round.wrapper import WrapperWALayer
 
 
 @torch.no_grad()
diff --git a/auto_round/inference/auto_quantizer.py b/auto_round/inference/auto_quantizer.py
index 3aea03e04..765a7e150 100644
--- a/auto_round/inference/auto_quantizer.py
+++ b/auto_round/inference/auto_quantizer.py
@@ -29,8 +29,9 @@
 import importlib.util
 import warnings
 from dataclasses import dataclass
+from enum import Enum
 from logging import getLogger
-from typing import Any, Dict, Optional, Tuple, Union, List
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -39,9 +40,9 @@
 from transformers.quantizers import AutoQuantizationConfig, HfQuantizer
 from transformers.quantizers.auto import AUTO_QUANTIZER_MAPPING
 from transformers.utils.quantization_config import AwqConfig, GPTQConfig, QuantizationConfigMixin, QuantizationMethod
-from auto_round.utils import (is_hpu_supported)
+
 from auto_round.inference.convert_model import convert_hf_model, infer_target_device, post_init
-from enum import Enum
+from auto_round.utils import is_hpu_supported
 
 logger = getLogger(__name__)
 import sys
diff --git a/auto_round/inference/backend.py b/auto_round/inference/backend.py
index d1ad65f97..47bffe6dd 100644
--- a/auto_round/inference/backend.py
+++ b/auto_round/inference/backend.py
@@ -14,7 +14,7 @@
 
 import functools
 from dataclasses import dataclass, field
-from typing import List, Any, Optional
+from typing import Any, List, Optional
 
 from transformers.utils.versions import require_version
 
@@ -308,7 +308,7 @@ def check_compatible(backend_name, device, bits, group_size, sym, packing_format
     backend = BackendInfos[backend_name]
 
     # Check if device is supported by the backend
-    if not device in backend.device:
+    if device not in backend.device:
         return False
 
     # Check if bit-width is supported
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
index 1e6a565b5..6004e8acc 100644
--- a/auto_round/inference/convert_model.py
+++ b/auto_round/inference/convert_model.py
@@ -15,19 +15,31 @@
 import re
 from logging import getLogger
 from typing import Union
-from tqdm import tqdm
+
 import torch
 import torch.nn as nn
-
+from tqdm import tqdm
 from transformers.pytorch_utils import Conv1D
 
-from auto_round.utils import (
-    get_module, set_module, is_hpu_supported, get_block_names, find_matching_blocks,
-    get_layer_names_in_block, check_to_quantized, check_start_with_block_name, SUPPORTED_LAYER_TYPES)
-
 from auto_round.inference.backend import (
-    get_layer_backend, dynamic_import_inference_linear, find_backend, BackendInfos, get_highest_priority_backend,
-    process_requirement)
+    BackendInfos,
+    dynamic_import_inference_linear,
+    find_backend,
+    get_highest_priority_backend,
+    get_layer_backend,
+    process_requirement,
+)
+from auto_round.utils import (
+    SUPPORTED_LAYER_TYPES,
+    check_start_with_block_name,
+    check_to_quantized,
+    find_matching_blocks,
+    get_block_names,
+    get_layer_names_in_block,
+    get_module,
+    is_hpu_supported,
+    set_module,
+)
 
 logger = getLogger(__name__)
 
@@ -69,7 +81,9 @@ def get_keys_to_not_convert(model):
         Input model
     """
     from copy import deepcopy
+
     from accelerate.utils import find_tied_parameters
+
     # Create a copy of the model and tie the weights, then
     # check if it contains tied weights
     tied_model = deepcopy(model)  # this has 0 cost since it is done inside `init_empty_weights` context manager`
@@ -508,7 +522,7 @@ def convert_hf_model(model: nn.Module, target_device="cpu"):
 
     quantization_config = model.config.quantization_config
 
-    if hasattr(quantization_config, "desc_act") and quantization_config.desc_act == True:
+    if hasattr(quantization_config, "desc_act") and quantization_config.desc_act:
         ##check static_group
         if (hasattr(quantization_config, "static_groups") and not quantization_config.static_groups) or (
                 not hasattr(quantization_config, "static_groups")):
diff --git a/auto_round/low_cpu_mem/load.py b/auto_round/low_cpu_mem/load.py
index 1ff291b16..44a8f2b0b 100644
--- a/auto_round/low_cpu_mem/load.py
+++ b/auto_round/low_cpu_mem/load.py
@@ -33,7 +33,6 @@
 )
 
 from ..low_cpu_mem import modified_pickle as pickle
-
 from .utils import torch
 
 torch_version = torch.__version__.split("+")[0]
diff --git a/auto_round/low_cpu_mem/utils.py b/auto_round/low_cpu_mem/utils.py
index f4165b4eb..715556f0f 100644
--- a/auto_round/low_cpu_mem/utils.py
+++ b/auto_round/low_cpu_mem/utils.py
@@ -16,13 +16,13 @@
 # limitations under the License.
 """Utils for layer wise quantization."""
 
-import os
 import gc
 import json
-import pickle
-from functools import partial
 import logging
+import os
+import pickle
 from collections import OrderedDict
+from functools import partial
 
 import torch
 from accelerate import init_empty_weights
@@ -30,9 +30,10 @@
 from transformers import AutoConfig, AutoModelForCausalLM
 from transformers.models.auto.auto_factory import _BaseAutoModelClass
 
-from .load import load
 from auto_round.utils import detect_device
 
+from .load import load
+
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(filename)s L%(lineno)d: %(message)s")
 logger = logging.getLogger("low_cpu_mem_tools")
 
diff --git a/auto_round/mllm/autoround_mllm.py b/auto_round/mllm/autoround_mllm.py
index 5ab25e92d..ed6d06611 100644
--- a/auto_round/mllm/autoround_mllm.py
+++ b/auto_round/mllm/autoround_mllm.py
@@ -12,29 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Union
-from tqdm import tqdm
 from copy import deepcopy
+from typing import Union
 
 import torch
+from tqdm import tqdm
+
+from auto_round.special_model_handler import SUPPORT_ONLY_TEXT_MODELS, _handle_special_model
 
+from ..autoround import AutoRound
+from ..low_cpu_mem.utils import get_layers_before_block
 from ..utils import (
-    logger,
+    clear_memory,
     detect_device,
+    extract_block_names_to_str,
+    find_matching_blocks,
+    get_block_names,
+    logger,
+    mllm_load_model,
     to_device,
     to_dtype,
-    get_block_names,
-    find_matching_blocks,
-    extract_block_names_to_str,
-    clear_memory,
-    mllm_load_model
 )
-from ..autoround import AutoRound
-from .template import get_template, Template
-from auto_round.special_model_handler import  SUPPORT_ONLY_TEXT_MODELS, _handle_special_model
 from .mllm_dataset import get_mllm_dataloader
-from ..low_cpu_mem.utils import get_layers_before_block
-
+from .template import Template, get_template
 
 
 def _only_text_test(model, tokenizer, device, model_type):
@@ -198,7 +198,7 @@ def __init__(
                 (dataset in CALIB_DATASETS.keys() and not \
                  _only_text_test(model, tokenizer, device, self.template.model_type)):
                 if quant_nontext_module:
-                    logger.warning(f"Text only dataset cannot be used for calibrating non-text modules,"
+                    logger.warning("Text only dataset cannot be used for calibrating non-text modules,"
                                 "switching to liuhaotian/llava_conv_58k")
                 else:
                     logger.warning(f"{model.config.model_type} not support for {dataset},"
diff --git a/auto_round/mllm/eval.py b/auto_round/mllm/eval.py
index 997dea72d..dfe4aaa52 100644
--- a/auto_round/mllm/eval.py
+++ b/auto_round/mllm/eval.py
@@ -46,14 +46,15 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+import json
 import os
 import time
-import json
 from functools import partial
 
-import pandas as pd
-from ..utils import logger, LazyImport
 import numpy as np
+import pandas as pd
+
+from ..utils import LazyImport, logger
 
 vlmeval = LazyImport("vlmeval")
 
diff --git a/auto_round/mllm/mllm_dataset.py b/auto_round/mllm/mllm_dataset.py
index 18b7d0fd5..953c3e0c2 100644
--- a/auto_round/mllm/mllm_dataset.py
+++ b/auto_round/mllm/mllm_dataset.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import json
+import os
 from typing import Dict
 
 import torch
-from torch.utils.data import Dataset, DataLoader
+from torch.utils.data import DataLoader, Dataset
 from transformers import set_seed
 
-from .utils import _extract_data_dir
-from .template import Template
-from ..utils import logger
 from ..special_model_handler import check_mllm_model_batch
+from ..utils import logger
+from .template import Template
+from .utils import _extract_data_dir
 
 MLLM_DATASET: Dict[str, Dataset] = {}
 
@@ -250,7 +250,7 @@ def get_mllm_dataloader(
             tokenizer, seqlen, dataset, seed, bs, nsamples)
         if quant_nontext_module:
             logger.error(
-                f"Text only dataset cannot be used for calibrating non-text modules,"
+                "Text only dataset cannot be used for calibrating non-text modules,"
                  " switching to liuhaotian/llava_conv_58k")
             exit(-1)
         return dataloader, bs, gradient_accumulate_steps
diff --git a/auto_round/mllm/template.py b/auto_round/mllm/template.py
index 0782fd09c..1794cca06 100644
--- a/auto_round/mllm/template.py
+++ b/auto_round/mllm/template.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import json
+import os
 from dataclasses import dataclass
-from typing import Dict, Optional, List
 from enum import Enum, unique
+from typing import Dict, List, Optional
 
 from ..utils import logger
-from .processor import BasicProcessor, PROCESSORS
+from .processor import PROCESSORS, BasicProcessor
 
 TEMPLATES: Dict[str, "Template"] = {}
 
diff --git a/auto_round/mllm/utils.py b/auto_round/mllm/utils.py
index eebb2a189..90e67e855 100644
--- a/auto_round/mllm/utils.py
+++ b/auto_round/mllm/utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+
 import requests
 
 from ..utils import LazyImport
diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py
index 8faea4e8f..7463dbcc4 100644
--- a/auto_round/script/llm.py
+++ b/auto_round/script/llm.py
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import argparse
+import logging
+
 # Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -27,16 +30,16 @@
 # limitations under the License.
 import os
 import re
-import argparse
 import sys
-import logging
+
 from auto_round.utils import (
-    get_fp_layer_names,
     clear_memory,
     get_device_and_parallelism,
+    get_fp_layer_names,
     get_model_dtype,
+    set_cuda_visible_devices,
     str2bool,
-    set_cuda_visible_devices)
+)
 
 os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
 
@@ -341,11 +344,9 @@ def tune(args):
         args.eval_bs = "auto"
 
     import transformers
+    from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer
 
-    from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, AutoConfig
-
-    from auto_round.utils import detect_device, get_library_version
-    from auto_round.utils import logger
+    from auto_round.utils import detect_device, get_library_version, logger
 
     if args.format is None:
         args.format = "auto_round"
@@ -427,8 +428,8 @@ def tune(args):
                 if lm_head_layer_name in item:  ##TODO extend to encoder-decoder layer, seq classification model
                     args.quant_lm_head = False
                     logger.warning(
-                        f"reset `quant_lm_head` to `False` as quantizing lm_head with tied weights has not been "
-                        f"supported currently")
+                        "reset `quant_lm_head` to `False` as quantizing lm_head with tied weights has not been "
+                        "supported currently")
                     break
 
     if args.quant_lm_head:
@@ -670,8 +671,8 @@ def eval(args):
     eval_model_dtype = get_model_dtype(args.eval_model_dtype)
     if is_gguf_file:
         import torch
-        from transformers import AutoTokenizer, AutoModelForCausalLM
         from lm_eval.utils import make_table  # pylint: disable=E0401
+        from transformers import AutoModelForCausalLM, AutoTokenizer
         tokenizer = AutoTokenizer.from_pretrained(model, gguf_file=gguf_file)
 
         logger.warning("evaluating gguf model is an experimental feature, the accuracy may be not correct.")
@@ -719,12 +720,13 @@ def eval_task_by_task(
 
     # load after _eval_int in order to make sure import torch after set CUDA_VISBILE_DEVICES
     import traceback
-    from auto_round.utils import logger
+
     from lm_eval import simple_evaluate as lm_simple_evaluate
     from lm_eval.models.huggingface import HFLM
     from transformers import AutoModelForCausalLM, AutoTokenizer
 
     from auto_round import AutoRoundConfig  # pylint: disable=E0611
+    from auto_round.utils import logger
     if batch_size is None:
         batch_size = "auto:8"
     is_gguf_file = False
diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
index 2e913fd92..dabbc4014 100644
--- a/auto_round/script/mllm.py
+++ b/auto_round/script/mllm.py
@@ -12,17 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import argparse
 import os
 import sys
-import argparse
 
 from auto_round.utils import (
-    get_fp_layer_names,
     clear_memory,
-    is_debug_mode,
     get_device_and_parallelism,
-    set_cuda_visible_devices,
+    get_fp_layer_names,
+    is_debug_mode,
     logger,
+    set_cuda_visible_devices,
 )
 
 os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
@@ -284,8 +284,7 @@ def setup_lmeval_parser():
 
 def tune(args):
     import transformers
-
-    from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, AutoProcessor
+    from transformers import AutoConfig, AutoModelForCausalLM, AutoProcessor, AutoTokenizer
 
     if args.format is None:
         args.format = "auto_round"
@@ -377,8 +376,8 @@ def tune(args):
                 if lm_head_layer_name in item:  ##TODO extend to encoder-decoder layer, seq classification model
                     args.quant_lm_head = False
                     print(
-                        f"warning, disable quant_lm_head as quantizing lm_head with tied weights has not been "
-                        f"supported currently")
+                        "warning, disable quant_lm_head as quantizing lm_head with tied weights has not been "
+                        "supported currently")
                     break
     if args.quant_lm_head:
         layer_config[lm_head_layer_name] = {"bits": args.bits}
@@ -390,8 +389,8 @@ def tune(args):
 
     if args.quant_lm_head and args.low_gpu_mem_usage:
         print(
-            f"warning, low_gpu_mem_usage=False is strongly recommended if the whole model could be loaded to "
-            f"gpu")
+            "warning, low_gpu_mem_usage=False is strongly recommended if the whole model could be loaded to "
+            "gpu")
 
     if "--truncation" not in sys.argv:
         args.truncation = None
diff --git a/auto_round/testing_utils.py b/auto_round/testing_utils.py
index be856083c..c859c6188 100644
--- a/auto_round/testing_utils.py
+++ b/auto_round/testing_utils.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import importlib.util
+import unittest
 
 import torch
-
 from transformers.utils.versions import require_version
 
+
 def is_gguf_available():
     return importlib.util.find_spec("gguf") is not None
 
diff --git a/auto_round/utils.py b/auto_round/utils.py
index f64c8fb41..988af3db8 100644
--- a/auto_round/utils.py
+++ b/auto_round/utils.py
@@ -13,23 +13,24 @@
 # limitations under the License.
 
 import copy
+import gc
 import logging
 import os
-import sys
+import re
 import subprocess
+import sys
 from collections import UserDict
-import re
+from functools import lru_cache
+
 import cpuinfo
 import psutil
 import torch
+import transformers
+from packaging import version
 from torch.amp import autocast
 
-from functools import lru_cache
-from packaging import version
-import gc
+from auto_round.export.export_to_gguf.config import GGML_QUANT_SIZES, GGUF_CONFIG, GGUF_INNER_CONFIG, QK_K, ModelType
 from auto_round.special_model_handler import SPECIAL_MULTIMODAL_BLOCK, SPECIAL_SHARED_CACHE_KEYS
-import transformers
-from auto_round.export.export_to_gguf.config import GGUF_CONFIG, GGML_QUANT_SIZES, GGUF_INNER_CONFIG, QK_K, ModelType
 
 SHARED_CACHE_KEYS = ("position_ids", "cache_position", "position_embeddings")
 
@@ -108,6 +109,7 @@ def format(self, record):
 logger.addHandler(fh)
 
 import importlib
+
 import transformers
 
 
@@ -1094,7 +1096,7 @@ def check_awq_gemm_compatibility(model, bits, group_size, sym, layer_configs=Non
             - str: An error message describing why the model is incompatible, or an empty string if compatible.
     """
     if bits != 4:
-        return False, f"AutoAWQ GEMM kernel only supports 4 bits"
+        return False, "AutoAWQ GEMM kernel only supports 4 bits"
     for n, m in model.named_modules():
         if isinstance(m, transformers.pytorch_utils.Conv1D):
             return False, "AutoAWQ GEMM kernel does not support conv1d"
@@ -1174,9 +1176,10 @@ def get_layer_features(layer):
 
 
 def _gguf_args_check(args_or_ar, format_str=None, model_type=ModelType.TEXT):
-    from auto_round.utils import logger
     import argparse
 
+    from auto_round.utils import logger
+
     if format_str is None:
         args_or_ar.format = args_or_ar.format.replace("q*_", f"q{args_or_ar.bits}_")
         format_str = args_or_ar.format
@@ -1209,6 +1212,7 @@ def _gguf_args_check(args_or_ar, format_str=None, model_type=ModelType.TEXT):
 
             if isinstance(args_or_ar.model, str) and os.path.isdir(args_or_ar.model):
                 from pathlib import Path
+
                 from auto_round.export.export_to_gguf.convert import ModelBase
                 hparams = ModelBase.load_hparams(Path(args_or_ar.model))
                 model_architecture = hparams["architectures"][0]
@@ -1275,7 +1279,7 @@ def llm_load_model(
         low_cpu_mem_mode=0,
         low_cpu_mem_tmp_dir=None,
         **kwargs):
-    from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
+    from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
 
     is_glm = bool(re.search("chatglm", pretrained_model_name_or_path.lower()))
     low_cpu_mem_usage = False
@@ -1342,9 +1346,10 @@ def mllm_load_model(
         model_dtype=None,
         **kwargs):
     import json
+
     import transformers
-    from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM, AutoModel
-    from huggingface_hub import HfApi, hf_hub_download, HfFileSystem
+    from huggingface_hub import HfApi, HfFileSystem, hf_hub_download
+    from transformers import AutoModel, AutoModelForCausalLM, AutoProcessor, AutoTokenizer
 
     if os.path.isdir(pretrained_model_name_or_path):
         config = json.load(open(os.path.join(pretrained_model_name_or_path, "config.json")))
@@ -1372,7 +1377,7 @@ def mllm_load_model(
 
     processor, image_processor = None, None
     if "deepseek_vl_v2" == model_type:
-        from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM  # pylint: disable=E0401
+        from deepseek_vl2.models import DeepseekVLV2ForCausalLM, DeepseekVLV2Processor  # pylint: disable=E0401
         processor = DeepseekVLV2Processor.from_pretrained(pretrained_model_name_or_path)
         tokenizer = processor.tokenizer
         model: DeepseekVLV2ForCausalLM = AutoModelForCausalLM.from_pretrained(
@@ -1628,6 +1633,7 @@ def get_layer_config_by_gguf_format(layer_config, gguf_format, model, model_type
     target_gguf_format = next((fmt for fmt in gguf_format if fmt != "fake"), None)
 
     import gguf  # pylint: disable=E0401
+
     from auto_round.export.export_to_gguf.convert import ModelBase, get_model_architecture
     model_architecture = get_model_architecture(hparams=model.config.to_dict(), model_type=model_type)
     try:
@@ -1912,7 +1918,7 @@ def get_gguf_qtype_by_layer_config(layer_config):
         return gguf.GGMLQuantizationType.Q6_K
     if bits == 8 and sym and group_size == 32:
         return gguf.GGMLQuantizationType.Q8_0
-    raise ValueError(f"Unknown layer config")
+    raise ValueError("Unknown layer config")
 
 def flatten_list(nested_list):
     flattened = []
diff --git a/auto_round/wrapper.py b/auto_round/wrapper.py
index a226daeed..12eb573fc 100644
--- a/auto_round/wrapper.py
+++ b/auto_round/wrapper.py
@@ -13,15 +13,12 @@
 # limitations under the License.
 
 import torch
-from torch.functional import F
 import transformers
+from torch.functional import F
+
 from auto_round.data_type import get_quant_func
-from .utils import (
-    check_to_quantized,
-    get_scale_shape,
-    set_module,
-    logger, SUPPORTED_LAYER_TYPES
-)
+
+from .utils import SUPPORTED_LAYER_TYPES, check_to_quantized, get_scale_shape, logger, set_module
 
 
 def reshape_and_pad_tensor(v, group_size=-1):
diff --git a/auto_round_extension/__init__.py b/auto_round_extension/__init__.py
index e69de29bb..2b414eb72 100644
--- a/auto_round_extension/__init__.py
+++ b/auto_round_extension/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/auto_round_extension/cuda/__init__.py b/auto_round_extension/cuda/__init__.py
index e69de29bb..2b414eb72 100644
--- a/auto_round_extension/cuda/__init__.py
+++ b/auto_round_extension/cuda/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/auto_round_extension/cuda/gptqmodel_marlin.py b/auto_round_extension/cuda/gptqmodel_marlin.py
index 242037d9b..42bc2c75a 100644
--- a/auto_round_extension/cuda/gptqmodel_marlin.py
+++ b/auto_round_extension/cuda/gptqmodel_marlin.py
@@ -24,13 +24,13 @@
 
 
 def get_marlin_layer():  ##use an ugly wrapper to  import gptqmodel on demand
-    from gptqmodel.models._const import DEVICE, PLATFORM # pylint: disable=E0401
-    from gptqmodel.nn_modules.qlinear import BaseQuantLinear # pylint: disable=E0401
-    from gptqmodel.utils.backend import BACKEND # pylint: disable=E0401
+    from gptqmodel.models._const import DEVICE, PLATFORM  # pylint: disable=E0401
+    from gptqmodel.nn_modules.qlinear import BaseQuantLinear  # pylint: disable=E0401
+    from gptqmodel.utils.backend import BACKEND  # pylint: disable=E0401
 
     marlin_import_exception = None
     try:
-        import gptqmodel_marlin_kernels # pylint: disable=E0401
+        import gptqmodel_marlin_kernels  # pylint: disable=E0401
     except ImportError as e:
         marlin_import_exception = e
 
diff --git a/auto_round_extension/hpu/__init__.py b/auto_round_extension/hpu/__init__.py
index e69de29bb..2b414eb72 100644
--- a/auto_round_extension/hpu/__init__.py
+++ b/auto_round_extension/hpu/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/auto_round_extension/hpu/qlinear_hpu.py b/auto_round_extension/hpu/qlinear_hpu.py
index 9a6eb1490..5bcd06927 100644
--- a/auto_round_extension/hpu/qlinear_hpu.py
+++ b/auto_round_extension/hpu/qlinear_hpu.py
@@ -40,6 +40,7 @@
 import torch
 import torch.nn as nn
 import transformers
+
 try:
     import habana_frameworks.torch.core as htcore
     convert_from_uint4 = torch.ops.hpu.convert_from_uint4
diff --git a/auto_round_extension/hpu/qlinear_hpu_gptq.py b/auto_round_extension/hpu/qlinear_hpu_gptq.py
index c2f3a884e..0016ff032 100644
--- a/auto_round_extension/hpu/qlinear_hpu_gptq.py
+++ b/auto_round_extension/hpu/qlinear_hpu_gptq.py
@@ -40,6 +40,7 @@
 import torch
 import torch.nn as nn
 import transformers
+
 try:
     import habana_frameworks.torch.core as htcore
     convert_from_uint4 = torch.ops.hpu.convert_from_uint4
diff --git a/auto_round_extension/ipex/__init__.py b/auto_round_extension/ipex/__init__.py
index 2d32570d3..2929160d8 100644
--- a/auto_round_extension/ipex/__init__.py
+++ b/auto_round_extension/ipex/__init__.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from auto_round_extension.ipex.qlinear_ipex_awq import QuantLinear as IpexAWQQuantLinear
 from auto_round_extension.ipex.qlinear_ipex_gptq import (
     QuantLinear as IpexGPTQQuantLinear,
diff --git a/auto_round_extension/ipex/qlinear_ipex_awq.py b/auto_round_extension/ipex/qlinear_ipex_awq.py
index b03d2104f..83afdb2ff 100644
--- a/auto_round_extension/ipex/qlinear_ipex_awq.py
+++ b/auto_round_extension/ipex/qlinear_ipex_awq.py
@@ -1,6 +1,21 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import torch
 import torch.nn as nn
 
+
 class QuantLinear(nn.Module):
     QUANT_TYPE = "ipex_awq"
     def __init__(self, w_bit, group_size, in_features, out_features, bias, dev):
diff --git a/auto_round_extension/ipex/qlinear_ipex_gptq.py b/auto_round_extension/ipex/qlinear_ipex_gptq.py
index 042f0226f..e308ec5b9 100644
--- a/auto_round_extension/ipex/qlinear_ipex_gptq.py
+++ b/auto_round_extension/ipex/qlinear_ipex_gptq.py
@@ -1,12 +1,25 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
+from logging import getLogger
 
 import numpy as np
 import torch
 import torch.nn as nn
 import transformers
 
-from logging import getLogger
-
 logger = getLogger(__name__)
 
 BITS_DTYPE_MAPPING = {
@@ -100,9 +113,13 @@ def post_init(self):
         # if not self.training and IPEX_AVAILABLE:
         if not self.training:
             import intel_extension_for_pytorch as ipex
-            from intel_extension_for_pytorch.nn.modules.weight_only_quantization import WeightOnlyQuantizedLinear, \
-                QuantDtype, QuantMethod
+            from intel_extension_for_pytorch.nn.modules.weight_only_quantization import (
+                QuantDtype,
+                QuantMethod,
+                WeightOnlyQuantizedLinear,
+            )
             from packaging import version
+
             from auto_round.utils import get_library_version
             ipex_version = get_library_version("intel_extension_for_pytorch")
             if version.parse(ipex_version) >= version.parse("2.5"):
diff --git a/auto_round_extension/qbits/__init__.py b/auto_round_extension/qbits/__init__.py
index b4594b642..04cea3305 100644
--- a/auto_round_extension/qbits/__init__.py
+++ b/auto_round_extension/qbits/__init__.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from auto_round_extension.qbits.qlinear_qbits import QuantLinear as QBitsQuantLinear
 from auto_round_extension.qbits.qlinear_qbits_gptq import (
     QuantLinear as QBitsGPTQQuantLinear,
diff --git a/auto_round_extension/qbits/qbits_awq.py b/auto_round_extension/qbits/qbits_awq.py
index 0b3adefb0..07b3012ba 100644
--- a/auto_round_extension/qbits/qbits_awq.py
+++ b/auto_round_extension/qbits/qbits_awq.py
@@ -1,5 +1,20 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import torch
 import torch.nn as nn
+
 AWQ_REVERSE_ORDER = [0, 4, 1, 5, 2, 6, 3, 7]
 def unpack_awq(qweight: torch.Tensor, qzeros: torch.Tensor, bits: int):
     shifts = torch.arange(0, 32, bits, device="cpu")
diff --git a/auto_round_extension/qbits/qlinear_qbits.py b/auto_round_extension/qbits/qlinear_qbits.py
index aee384f32..2dee2d506 100644
--- a/auto_round_extension/qbits/qlinear_qbits.py
+++ b/auto_round_extension/qbits/qlinear_qbits.py
@@ -18,7 +18,9 @@
 import numpy as np
 import torch
 import torch.nn as nn
+
 from auto_round.utils import convert_dtype_torch2str, logger
+
 QBITS_AVAILABLE = True
 
 BITS_DTYPE_MAPPING = {
diff --git a/auto_round_extension/qbits/qlinear_qbits_gptq.py b/auto_round_extension/qbits/qlinear_qbits_gptq.py
index 8a1211955..e67bf6ef5 100644
--- a/auto_round_extension/qbits/qlinear_qbits_gptq.py
+++ b/auto_round_extension/qbits/qlinear_qbits_gptq.py
@@ -18,7 +18,9 @@
 import numpy as np
 import torch
 import torch.nn as nn
+
 from auto_round.utils import convert_dtype_torch2str, logger
+
 QBITS_AVAILABLE = True
 
 BITS_DTYPE_MAPPING = {
diff --git a/auto_round_extension/torch/__init__.py b/auto_round_extension/torch/__init__.py
index e69de29bb..2b414eb72 100644
--- a/auto_round_extension/torch/__init__.py
+++ b/auto_round_extension/torch/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/auto_round_extension/torch/qlinear_torch.py b/auto_round_extension/torch/qlinear_torch.py
index 066ba5beb..401ce149e 100644
--- a/auto_round_extension/torch/qlinear_torch.py
+++ b/auto_round_extension/torch/qlinear_torch.py
@@ -20,7 +20,6 @@
 import torch.nn as nn
 import transformers
 
-
 logger = getLogger(__name__)
 
 
diff --git a/auto_round_extension/torch/qlinear_torch_zp.py b/auto_round_extension/torch/qlinear_torch_zp.py
index a274f57be..503e40e49 100644
--- a/auto_round_extension/torch/qlinear_torch_zp.py
+++ b/auto_round_extension/torch/qlinear_torch_zp.py
@@ -21,7 +21,6 @@
 import torch.nn as nn
 import transformers
 
-
 logger = getLogger(__name__)
 
 
diff --git a/auto_round_extension/triton/__init__.py b/auto_round_extension/triton/__init__.py
index e69de29bb..2b414eb72 100644
--- a/auto_round_extension/triton/__init__.py
+++ b/auto_round_extension/triton/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/auto_round_extension/triton/qlinear_tritonv2.py b/auto_round_extension/triton/qlinear_tritonv2.py
index b9d8931b9..7320ba03f 100644
--- a/auto_round_extension/triton/qlinear_tritonv2.py
+++ b/auto_round_extension/triton/qlinear_tritonv2.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 from logging import getLogger
 
@@ -6,17 +20,15 @@
 import torch.nn as nn
 import transformers
 
-
 from auto_round_extension.triton.triton_utils.mixin import TritonModuleMixin
 
-
 logger = getLogger(__name__)
 
 try:
     from auto_round_extension.triton.triton_utils.dequant import QuantLinearFunction, quant_matmul_248
 except ImportError as e:
     if torch.xpu.is_available():
-        logger.error(f"please make sure your triton version is same with `pytorch-triton-xpu` library ")
+        logger.error("please make sure your triton version is same with `pytorch-triton-xpu` library ")
         exit(-1)
     triton_import_exception = e
 
diff --git a/auto_round_extension/triton/qlinear_tritonv2_zp.py b/auto_round_extension/triton/qlinear_tritonv2_zp.py
index 530e14d8d..af18f27b0 100644
--- a/auto_round_extension/triton/qlinear_tritonv2_zp.py
+++ b/auto_round_extension/triton/qlinear_tritonv2_zp.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 from logging import getLogger
 
@@ -6,17 +20,15 @@
 import torch.nn as nn
 import transformers
 
-
 from auto_round_extension.triton.triton_utils_zp.mixin import TritonModuleMixin
 
-
 logger = getLogger(__name__)
 
 try:
     from auto_round_extension.triton.triton_utils_zp.dequant import QuantLinearFunction, quant_matmul_248
 except ImportError as e:
     if torch.xpu.is_available():
-        logger.error(f"please make sure your triton version is same with `pytorch-triton-xpu` library ")
+        logger.error("please make sure your triton version is same with `pytorch-triton-xpu` library ")
         exit(-1)
     triton_import_exception = e
 
diff --git a/auto_round_extension/triton/triton_utils/custom_autotune.py b/auto_round_extension/triton/triton_utils/custom_autotune.py
index b511579cc..5b5b5b14d 100644
--- a/auto_round_extension/triton/triton_utils/custom_autotune.py
+++ b/auto_round_extension/triton/triton_utils/custom_autotune.py
@@ -40,7 +40,6 @@
 
 import triton
 
-
 #  code based https://github.com/fpgaminer/GPTQ-triton
 """
 Mostly the same as the autotuner in Triton, but with a few changes like using 40 runs instead of 100.
diff --git a/auto_round_extension/triton/triton_utils/kernels.py b/auto_round_extension/triton/triton_utils/kernels.py
index eebd29cdb..0fb42ac45 100644
--- a/auto_round_extension/triton/triton_utils/kernels.py
+++ b/auto_round_extension/triton/triton_utils/kernels.py
@@ -43,7 +43,6 @@
 
 from . import custom_autotune
 
-
 logger = getLogger(__name__)
 
 
diff --git a/auto_round_extension/triton/triton_utils_zp/custom_autotune.py b/auto_round_extension/triton/triton_utils_zp/custom_autotune.py
index b511579cc..5b5b5b14d 100644
--- a/auto_round_extension/triton/triton_utils_zp/custom_autotune.py
+++ b/auto_round_extension/triton/triton_utils_zp/custom_autotune.py
@@ -40,7 +40,6 @@
 
 import triton
 
-
 #  code based https://github.com/fpgaminer/GPTQ-triton
 """
 Mostly the same as the autotuner in Triton, but with a few changes like using 40 runs instead of 100.
diff --git a/auto_round_extension/triton/triton_utils_zp/kernels.py b/auto_round_extension/triton/triton_utils_zp/kernels.py
index f361a3dde..2d4a358aa 100644
--- a/auto_round_extension/triton/triton_utils_zp/kernels.py
+++ b/auto_round_extension/triton/triton_utils_zp/kernels.py
@@ -42,7 +42,6 @@
 
 from . import custom_autotune
 
-
 logger = getLogger(__name__)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 60bdae880..5fe81084a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,2 +1,97 @@
 [tool.codespell]
-ignore-words = ".azure-pipelines/scripts/codeScan/codespell/autoround_dict.txt"
\ No newline at end of file
+ignore-words = ".azure-pipelines/scripts/codeScan/codespell/autoround_dict.txt"
+
+[tool.isort]
+profile = "black"
+line_length = 120
+known_first_party = ["auto_round"]
+extend_skip_glob = ["**/__init__.py"]
+
+[tool.ruff]
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".git-rewrite",
+    ".hg",
+    ".ipynb_checkpoints",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pyenv",
+    ".pytest_cache",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    ".vscode",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "site-packages",
+    "venv",
+]
+
+# Same as Black.
+line-length = 120
+indent-width = 4
+
+# Assume Python 3.10
+target-version = "py310"
+
+[tool.ruff.lint]
+# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.
+# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
+# McCabe complexity (`C901`) by default.
+select = ["E4", "E7", "E9", "F", "NPY", "FURB"]
+ignore = [
+    "E402", # Module level import not at top of file
+    "E501", # Line too long (121 > 120 characters)
+    "E721", # Do not compare types, use isinstance()
+    "E722", # Do not use bare except
+    "E731", # Do not assign a lambda expression, use a def
+    "E741", # Do not use variables named ‘l’, ‘O’, or ‘I’
+    "F401", # {name} imported but unused
+    "F403", # from {name} import * used; unable to detect undefined names
+    "F841", # Local variable is assigned to but never used{name}
+]
+
+# Allow fix for all enabled rules (when `--fix`) is provided.
+fixable = ["ALL"]
+unfixable = []
+
+# Allow unused variables when underscore-prefixed.
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+
+[tool.ruff.format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"
+
+# Enable auto-formatting of code examples in docstrings. Markdown,
+# reStructuredText code/literal blocks and doctests are all supported.
+#
+# This is currently disabled by default, but it is planned for this
+# to be opt-out in the future.
+docstring-code-format = false
+
+# Set the line length limit used when formatting code snippets in
+# docstrings.
+#
+# This only has an effect when the `docstring-code-format` setting is
+# enabled.
+docstring-code-line-length = "dynamic"
diff --git a/setup.py b/setup.py
index a725e32e6..1d21ef8d5 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,10 @@
-import re
-from io import open
 import os
-from setuptools import find_packages, setup
+import re
 import sys
 from functools import lru_cache
+from io import open
+
+from setuptools import find_packages, setup
 
 os.environ["CC"] = "g++"
 os.environ["CXX"] = "g++"
@@ -67,7 +68,7 @@ def is_cpu_env():
 def fetch_requirements(path):
     requirements = []
     with open(path, "r") as fd:
-        requirements = [r.strip() for r in fd.readlines()]
+        requirements = [r.strip() for r in fd]
     return requirements
 
 
diff --git a/test/test_cpu/test_auto_round_hpu_only.py b/test/test_cpu/test_auto_round_hpu_only.py
index 006e44f63..a6fdb433a 100644
--- a/test/test_cpu/test_auto_round_hpu_only.py
+++ b/test/test_cpu/test_auto_round_hpu_only.py
@@ -1,14 +1,15 @@
 import pytest
 import torch
-from auto_round.utils import is_hpu_supported
-
 from _test_helpers import is_pytest_mode_compile, is_pytest_mode_lazy
 
+from auto_round.utils import is_hpu_supported
+
 
 def run_opt_125m_on_hpu():
-    from auto_round import AutoRound
     from transformers import AutoModelForCausalLM, AutoTokenizer
 
+    from auto_round import AutoRound
+
     model_name = "facebook/opt-125m"
     model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -24,7 +25,7 @@ def run_opt_125m_on_hpu():
         seqlen=2,
     )
     q_model, qconfig = autoround.quantize()
-    assert q_model is not None, f"Expected q_model to be not None"
+    assert q_model is not None, "Expected q_model to be not None"
 
 
 @pytest.mark.skipif(not is_hpu_supported(), reason="HPU is not supported")
@@ -42,8 +43,7 @@ def test_opt_125m_compile_mode():
 
 def test_import():
     from auto_round import AutoRound
-    from auto_round.export.export_to_itrex.export import (
-        WeightOnlyLinear, save_quantized_as_itrex)
+    from auto_round.export.export_to_itrex.export import WeightOnlyLinear, save_quantized_as_itrex
 
 
 @pytest.mark.parametrize(
@@ -51,9 +51,10 @@ def test_import():
     ["fp8_to_int_sym"],
 )
 def test_w4a8(data_type):
-    from auto_round import AutoRound
     from transformers import AutoModelForCausalLM, AutoTokenizer
 
+    from auto_round import AutoRound
+
     model_name = "facebook/opt-125m"
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
@@ -76,4 +77,4 @@ def test_w4a8(data_type):
         act_dynamic=False,
     )
     q_model, qconfig = autoround.quantize()
-    assert q_model is not None, f"Expected q_model to be not None"
+    assert q_model is not None, "Expected q_model to be not None"
diff --git a/test/test_cpu/test_autoopt.py b/test/test_cpu/test_autoopt.py
index 6a986706d..536906e8f 100644
--- a/test/test_cpu/test_autoopt.py
+++ b/test/test_cpu/test_autoopt.py
@@ -8,7 +8,7 @@
 import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from auto_round import AutoRoundOPT, AutoRoundAdam
+from auto_round import AutoRoundAdam, AutoRoundOPT
 
 
 class LLMDataLoader:
diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/test_autoround.py
index 25b5afe14..fc577e319 100644
--- a/test/test_cpu/test_autoround.py
+++ b/test/test_cpu/test_autoround.py
@@ -7,10 +7,10 @@
 
 sys.path.insert(0, "../..")
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoRoundConfig
+from _test_helpers import model_infer
+from transformers import AutoModelForCausalLM, AutoRoundConfig, AutoTokenizer
 
 from auto_round import AutoRound
-from _test_helpers import model_infer
 
 
 class LLMDataLoader:
@@ -423,7 +423,8 @@ def test_fallback_layers(self):
     def test_not_convert_modules(self):
         import requests
         from PIL import Image
-        from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+        from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
+
         from auto_round_extension.ipex.qlinear_ipex_awq import QuantLinear
         model_name = "Qwen/Qwen2-VL-2B-Instruct-AWQ"
         quantization_config = AutoRoundConfig()
diff --git a/test/test_cpu/test_autoround_acc.py b/test/test_cpu/test_autoround_acc.py
index 1158e9be3..656064907 100644
--- a/test/test_cpu/test_autoround_acc.py
+++ b/test/test_cpu/test_autoround_acc.py
@@ -3,14 +3,18 @@
 import shutil
 import sys
 import unittest
+
 sys.path.insert(0, "../..")
+from math import isclose
+
 import torch
 import transformers
-from math import isclose
 from transformers import AutoModelForCausalLM, AutoTokenizer
+
 from auto_round import AutoRound  # pylint: disable=E0401
 from auto_round.export.export_to_itrex.export import pack_model  # pylint: disable=E0401
 
+
 class LLMDataLoader:
     def __init__(self):
         self.batch_size = 1
diff --git a/test/test_cpu/test_block_names.py b/test/test_cpu/test_block_names.py
index 6f1436315..f7555a944 100644
--- a/test/test_cpu/test_block_names.py
+++ b/test/test_cpu/test_block_names.py
@@ -2,13 +2,16 @@
 import shutil
 import sys
 import unittest
+
 sys.path.insert(0, ".")
 sys.path.insert(0, "../..")
 import torch
 import torch.nn as nn
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+
 from auto_round import AutoRound
 
+
 class LLMDataLoader:
     def __init__(self, input_size=10):
         self.batch_size = 1
@@ -175,8 +178,9 @@ def test_block_name_quant(self):
         
 
     def test_mm_block_name(self):
-        from auto_round.utils import get_block_names
         from transformers import Qwen2VLForConditionalGeneration
+
+        from auto_round.utils import get_block_names
         model_name = "Qwen/Qwen2-VL-2B-Instruct"
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             model_name, trust_remote_code=True, device_map="auto")
diff --git a/test/test_cpu/test_conv1d.py b/test/test_cpu/test_conv1d.py
index 8e87170a2..0009bcedf 100644
--- a/test/test_cpu/test_conv1d.py
+++ b/test/test_cpu/test_conv1d.py
@@ -5,10 +5,12 @@
 
 sys.path.insert(0, "../..")
 import torch
+from _test_helpers import model_infer
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from auto_round import AutoRound
-from _test_helpers import model_infer
+
+
 class LLMDataLoader:
     def __init__(self):
         self.batch_size = 1
diff --git a/test/test_cpu/test_export.py b/test/test_cpu/test_export.py
index 8915c548e..bfd54ba31 100644
--- a/test/test_cpu/test_export.py
+++ b/test/test_cpu/test_export.py
@@ -4,7 +4,7 @@
 
 sys.path.insert(0, "../..")
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoRoundConfig
+from transformers import AutoModelForCausalLM, AutoRoundConfig, AutoTokenizer
 
 from auto_round import AutoRound
 
@@ -206,6 +206,7 @@ def test_autoround_3bit_sym_format(self):
     
     def test_static_afp8_export(self):
         import os
+
         from safetensors import safe_open
 
         model_name = "facebook/opt-125m"
diff --git a/test/test_cpu/test_generation.py b/test/test_cpu/test_generation.py
index cec33e991..11c9ca1bb 100644
--- a/test/test_cpu/test_generation.py
+++ b/test/test_cpu/test_generation.py
@@ -5,7 +5,7 @@
 
 sys.path.insert(0, "../..")
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoRoundConfig
+from transformers import AutoModelForCausalLM, AutoRoundConfig, AutoTokenizer
 
 from auto_round import AutoRound
 
diff --git a/test/test_cpu/test_gguf_format.py b/test/test_cpu/test_gguf_format.py
index ae5cfba5a..cb7691a1b 100644
--- a/test/test_cpu/test_gguf_format.py
+++ b/test/test_cpu/test_gguf_format.py
@@ -1,7 +1,8 @@
 import os
+import shutil
 import sys
 import unittest
-import shutil
+
 sys.path.insert(0, "../..")
 
 import torch
diff --git a/test/test_cpu/test_hpu.py b/test/test_cpu/test_hpu.py
index 629a93212..035060817 100644
--- a/test/test_cpu/test_hpu.py
+++ b/test/test_cpu/test_hpu.py
@@ -9,7 +9,6 @@
 from auto_round import AutoRound
 
 
-
 class LLMDataLoader:
     def __init__(self):
         self.batch_size = 1
@@ -20,7 +19,7 @@ def __iter__(self):
 
 def is_hpu_supported():
     try:
-        import habana_frameworks.torch.core as htcore # pylint: disable=E0401
+        import habana_frameworks.torch.core as htcore  # pylint: disable=E0401
     except ImportError as e:
         return False
     return True
diff --git a/test/test_cpu/test_llmcompressor_w8a8.py b/test/test_cpu/test_llmcompressor_w8a8.py
index 7156b6a07..978a8ed5c 100644
--- a/test/test_cpu/test_llmcompressor_w8a8.py
+++ b/test/test_cpu/test_llmcompressor_w8a8.py
@@ -1,7 +1,8 @@
 import os
+import shutil
 import sys
 import unittest
-import shutil
+
 sys.path.insert(0, "../..")
 
 import torch
diff --git a/test/test_cpu/test_load_awq_gptq.py b/test/test_cpu/test_load_awq_gptq.py
index 202605491..88e9730bc 100644
--- a/test/test_cpu/test_load_awq_gptq.py
+++ b/test/test_cpu/test_load_awq_gptq.py
@@ -4,9 +4,7 @@
 
 sys.path.insert(0, "../..")
 
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from transformers import AutoRoundConfig
+from transformers import AutoModelForCausalLM, AutoRoundConfig, AutoTokenizer
 
 
 class TestAutoRound(unittest.TestCase):
diff --git a/test/test_cpu/test_low_cpu_mem.py b/test/test_cpu/test_low_cpu_mem.py
index 2c4378f07..5ad8f5659 100644
--- a/test/test_cpu/test_low_cpu_mem.py
+++ b/test/test_cpu/test_low_cpu_mem.py
@@ -1,20 +1,21 @@
+import os
 import shutil
 import sys
-import os
 import unittest
+
 sys.path.insert(0, "../..")
 
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from auto_round import AutoRound
 from auto_round.low_cpu_mem.utils import (
-    load_model_with_hooks,
-    load_empty_model,
     get_layers_before_block,
     layer_wise_load,
     layer_wise_save,
-    )
-
-from auto_round import AutoRound
+    load_empty_model,
+    load_model_with_hooks,
+)
 
 
 class LLMDataLoader:
diff --git a/test/test_cpu/test_mllm.py b/test/test_cpu/test_mllm.py
index 5d5426b9f..4206847f0 100644
--- a/test/test_cpu/test_mllm.py
+++ b/test/test_cpu/test_mllm.py
@@ -3,11 +3,11 @@
 
 sys.path.insert(0, "../..")
 
-from auto_round import AutoRoundMLLM
+import shutil
 
 from transformers import AutoProcessor, AutoTokenizer, Qwen2VLForConditionalGeneration
 
-import shutil
+from auto_round import AutoRoundMLLM
 
 
 class FakeDataLoader:
@@ -73,7 +73,7 @@ def test_quant_vision(self): ## bug need to fix
         autoround.save_quantized("./saved/", format="auto_round", inplace=True)
 
     def test_quant_block_names(self):
-        from auto_round.utils import get_block_names,find_matching_blocks
+        from auto_round.utils import find_matching_blocks, get_block_names
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)
         model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -109,6 +109,7 @@ def test_diff_dataset(self):
     
     def test_pure_text_model_check(self):
         from transformers import AutoModelForCausalLM
+
         from auto_round.utils import is_pure_text_model
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             self.model_name, trust_remote_code=True, device_map="auto")
diff --git a/test/test_cpu/test_utils.py b/test/test_cpu/test_utils.py
index eff324e43..846af0036 100644
--- a/test/test_cpu/test_utils.py
+++ b/test/test_cpu/test_utils.py
@@ -1,8 +1,10 @@
-from unittest.mock import patch
 import sys
+from unittest.mock import patch
+
 sys.path.insert(0, "../..")
 import auto_round.utils as auto_round_utils
 
+
 class TestPackingWithNumba:
 
     @patch.object(auto_round_utils, "_is_tbb_installed", lambda: False)
diff --git a/test/test_cpu/test_woq_linear.py b/test/test_cpu/test_woq_linear.py
index 1f48e2309..673b5d06d 100644
--- a/test/test_cpu/test_woq_linear.py
+++ b/test/test_cpu/test_woq_linear.py
@@ -1,6 +1,8 @@
+import sys
+
 import pytest
 import torch
-import sys
+
 sys.path.insert(0, "../..")
 from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear
 
diff --git a/test/test_cuda/test_2_3bits.py b/test/test_cuda/test_2_3bits.py
index 73f73d8f2..519d4e6de 100644
--- a/test/test_cuda/test_2_3bits.py
+++ b/test/test_cuda/test_2_3bits.py
@@ -1,17 +1,17 @@
 import copy
+import re
 import shutil
 import sys
 import unittest
-import re
 
 sys.path.insert(0, "../..")
 import torch
 import transformers
+from lm_eval.utils import make_table  # pylint: disable=E0401
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from auto_round import AutoRound
 from auto_round.eval.evaluation import simple_evaluate
-from lm_eval.utils import make_table  # pylint: disable=E0401
 from auto_round.testing_utils import require_autogptq, require_greater_than_050, require_greater_than_051
 
 
diff --git a/test/test_cuda/test_auto_round_format.py b/test/test_cuda/test_auto_round_format.py
index 817b5087d..22c963a48 100644
--- a/test/test_cuda/test_auto_round_format.py
+++ b/test/test_cuda/test_auto_round_format.py
@@ -4,15 +4,13 @@
 import unittest
 
 sys.path.insert(0, "../..")
-from auto_round.eval.evaluation import simple_evaluate_user_model
-from auto_round.testing_utils import require_greater_than_050, require_autogptq, require_awq, require_ipex
-
 import torch
 import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from auto_round import AutoRound
-from auto_round import AutoRoundConfig
+from auto_round import AutoRound, AutoRoundConfig
+from auto_round.eval.evaluation import simple_evaluate_user_model
+from auto_round.testing_utils import require_autogptq, require_awq, require_greater_than_050, require_ipex
 
 
 class LLMDataLoader:
diff --git a/test/test_cuda/test_conv1d.py b/test/test_cuda/test_conv1d.py
index bf0daba34..b8fa0aa43 100644
--- a/test/test_cuda/test_conv1d.py
+++ b/test/test_cuda/test_conv1d.py
@@ -5,11 +5,13 @@
 
 sys.path.insert(0, "../..")
 import torch
+from _test_helpers import model_infer
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from auto_round import AutoRound
 from auto_round.testing_utils import require_gptqmodel
-from _test_helpers import model_infer
+
+
 class LLMDataLoader:
     def __init__(self):
         self.batch_size = 1
@@ -35,7 +37,7 @@ def tearDownClass(self):
     def test_quant(self):
         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, torch_dtype="auto", trust_remote_code=True)
         bits, group_size, sym = 4, 128, True
-        from auto_round import  AutoRoundConfig
+        from auto_round import AutoRoundConfig
         autoround = AutoRound(
             self.model,
             self.tokenizer,
diff --git a/test/test_cuda/test_exllamav2_backend.py b/test/test_cuda/test_exllamav2_backend.py
index 2a6539306..4279d6e57 100644
--- a/test/test_cuda/test_exllamav2_backend.py
+++ b/test/test_cuda/test_exllamav2_backend.py
@@ -1,15 +1,16 @@
 import shutil
 import sys
 import unittest
+
 import pytest
+
 sys.path.insert(0, "../..")
 
 
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from auto_round import AutoRound
-from auto_round import AutoRoundConfig
+from auto_round import AutoRound, AutoRoundConfig
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.testing_utils import require_autogptq, require_gptqmodel
 
diff --git a/test/test_cuda/test_get_block_name.py b/test/test_cuda/test_get_block_name.py
index fd89e2aa1..f796970d3 100644
--- a/test/test_cuda/test_get_block_name.py
+++ b/test/test_cuda/test_get_block_name.py
@@ -6,10 +6,17 @@
 sys.path.insert(0, "../..")
 import torch
 import transformers
-from transformers import AutoModelForCausalLM, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoModelForVision2Seq, \
-    Gemma3ForConditionalGeneration, Mistral3ForConditionalGeneration
-from auto_round.utils import get_block_names, is_pure_text_model
+from transformers import (
+    AutoModelForCausalLM,
+    AutoModelForVision2Seq,
+    AutoTokenizer,
+    Gemma3ForConditionalGeneration,
+    Mistral3ForConditionalGeneration,
+    Qwen2VLForConditionalGeneration,
+)
+
 from auto_round import AutoRound
+from auto_round.utils import get_block_names, is_pure_text_model
 
 
 class TestAutoRound(unittest.TestCase):
@@ -33,7 +40,7 @@ def test_glm4(self):
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
         block_names = get_block_names(model)
         self.check_block_names(block_names, ["transformer.encoder.layers"], [40])
-        assert is_pure_text_model(model) == True
+        assert is_pure_text_model(model), "Expected model to be pure text model"
 
     def test_opt_125m(self):
         model_name = "/models/opt-125m"
@@ -41,56 +48,56 @@ def test_opt_125m(self):
         block_names = get_block_names(model)
         self.check_block_names(block_names, ["model.decoder.layers"], [12])
 
-        assert is_pure_text_model(model) == True
+        assert is_pure_text_model(model)
 
     def test_Qwen(self):
         model_name = "/models/Qwen2.5-7B-Instruct"
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
         block_names = get_block_names(model)
         self.check_block_names(block_names, ["model.layers"], [28])
-        assert is_pure_text_model(model) == True
+        assert is_pure_text_model(model)
 
     def test_phi4(self):
         model_name = "/models/phi-4"
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
         block_names = get_block_names(model)
         self.check_block_names(block_names, ["model.layers"], [40])
-        assert is_pure_text_model(model) == True
+        assert is_pure_text_model(model)
 
     def test_llama3(self):
         model_name = "/models/Meta-Llama-3.1-8B-Instruct"
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
         block_names = get_block_names(model)
         self.check_block_names(block_names, ["model.layers"], [32])
-        assert is_pure_text_model(model) == True
+        assert is_pure_text_model(model)
 
     def test_mixtral(self):
         model_name = "/models/Mixtral-8x7B-Instruct-v0.1"
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
         block_names = get_block_names(model)
         self.check_block_names(block_names, ["model.layers"], [32])
-        assert is_pure_text_model(model) == True
+        assert is_pure_text_model(model)
 
     def test_falcon(self):
         model_name = "/models/Falcon3-7B-Instruct"
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
         block_names = get_block_names(model)
         self.check_block_names(block_names, ["model.layers"], [28])
-        assert is_pure_text_model(model) == True
+        assert is_pure_text_model(model)
 
     def test_orca(self):
         model_name = "/models/Orca-2-7b"
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
         block_names = get_block_names(model)
         self.check_block_names(block_names, ["model.layers"], [32])
-        assert is_pure_text_model(model) == True
+        assert is_pure_text_model(model)
 
     def test_OLMo(self):
         model_name = "/models/OLMo-2-1124-7B-Instruct"
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)
         block_names = get_block_names(model)
         self.check_block_names(block_names, ["model.layers"], [32])
-        assert is_pure_text_model(model) == True
+        assert is_pure_text_model(model)
 
     def test_Qwen2VL(self):
         model_name = "/models/Qwen2-VL-2B-Instruct"
@@ -100,7 +107,7 @@ def test_Qwen2VL(self):
 
         block_names = get_block_names(model, quant_vision=True)
         self.check_block_names(block_names, ["visual.blocks", "model.layers"], [32, 28])
-        assert is_pure_text_model(model) == False
+        assert not is_pure_text_model(model)
 
     def test_Llama32(self):
         model_name = "/models/Llama-3.2-11B-Vision-Instruct"
@@ -113,7 +120,7 @@ def test_Llama32(self):
                                ["vision_model.transformer.layers", "vision_model.global_transformer.layers",
                                 "language_model.model.layers"], [32, 8, 40])
 
-        assert is_pure_text_model(model) == False
+        assert not is_pure_text_model(model)
 
     def test_SmolVLM(self):
         model_name = "/models/SmolVLM-Instruct"
@@ -123,7 +130,7 @@ def test_SmolVLM(self):
 
         block_names = get_block_names(model, quant_vision=True)
         self.check_block_names(block_names, ["model.vision_model.encoder.layers", "model.text_model.layers"], [27, 24])
-        assert is_pure_text_model(model) == False
+        assert not is_pure_text_model(model)
 
     def test_glm_4v(self):
         model_name = "/models/glm-4v-9b"
@@ -134,7 +141,7 @@ def test_glm_4v(self):
         block_names = get_block_names(model, quant_vision=True)
         self.check_block_names(block_names, ["transformer.encoder.layers", "transformer.vision.transformer.layers"],
                                [40, 63])
-        assert is_pure_text_model(model) == False
+        assert not is_pure_text_model(model)
 
     def test_gemma3(self):
         model_name = "/models/gemma-3-12b-it"
@@ -145,7 +152,7 @@ def test_gemma3(self):
         block_names = get_block_names(model, quant_vision=True)
         self.check_block_names(block_names, ["vision_tower.vision_model.encoder.layers", "language_model.model.layers"],
                                [27, 48])
-        assert is_pure_text_model(model) == False
+        assert not is_pure_text_model(model)
 
     def test_Mistral3(self):
         model_name = "/models/Mistral-Small-3.1-24B-Instruct-2503"
@@ -156,7 +163,7 @@ def test_Mistral3(self):
         block_names = get_block_names(model, quant_vision=True)
         self.check_block_names(block_names, ["vision_tower.transformer.layers", "language_model.model.layers"],
                                [24, 40])
-        assert is_pure_text_model(model) == False
+        assert not is_pure_text_model(model)
 
     def test_Molmo(self):
         model_name = "/models/Molmo-7B-D-0924"
@@ -168,7 +175,7 @@ def test_Molmo(self):
         self.check_block_names(block_names,
                                ["model.transformer.blocks", "model.vision_backbone.image_vit.transformer.resblocks"],
                                [28, 23])
-        assert is_pure_text_model(model) == False
+        assert not is_pure_text_model(model)
 
 
 if __name__ == "__main__":
diff --git a/test/test_cuda/test_gguf.py b/test/test_cuda/test_gguf.py
index ece1c8c1a..6faf31eb4 100644
--- a/test/test_cuda/test_gguf.py
+++ b/test/test_cuda/test_gguf.py
@@ -11,6 +11,7 @@
 from auto_round import AutoRound
 from auto_round.testing_utils import require_gguf
 
+
 class LLMDataLoader:
     def __init__(self):
         self.batch_size = 1
diff --git a/test/test_cuda/test_main_func.py b/test/test_cuda/test_main_func.py
index 7f520cb38..d32faf92a 100644
--- a/test/test_cuda/test_main_func.py
+++ b/test/test_cuda/test_main_func.py
@@ -1,18 +1,18 @@
 import copy
+import re
 import shutil
 import sys
 import unittest
-import re
 
 sys.path.insert(0, "../..")
 import torch
 import transformers
+from lm_eval.utils import make_table  # pylint: disable=E0401
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from auto_round import AutoRound, AutoRoundAdam
 from auto_round.eval.evaluation import simple_evaluate
-from auto_round.testing_utils import require_gptqmodel, require_optimum, require_awq
-from lm_eval.utils import make_table  # pylint: disable=E0401
+from auto_round.testing_utils import require_awq, require_gptqmodel, require_optimum
 
 
 def get_accuracy(data):
diff --git a/test/test_cuda/test_marlin_backend.py b/test/test_cuda/test_marlin_backend.py
index cb3516cad..f398029c2 100644
--- a/test/test_cuda/test_marlin_backend.py
+++ b/test/test_cuda/test_marlin_backend.py
@@ -1,14 +1,14 @@
 import shutil
 import sys
 import unittest
+
 import pytest
 
 sys.path.insert(0, "../..")
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from auto_round import AutoRound
-from auto_round import AutoRoundConfig
+from auto_round import AutoRound, AutoRoundConfig
 from auto_round.eval.evaluation import simple_evaluate_user_model
 
 
diff --git a/test/test_cuda/test_multiple_card.py b/test/test_cuda/test_multiple_card.py
index 5f8ab73d7..1710c1660 100644
--- a/test/test_cuda/test_multiple_card.py
+++ b/test/test_cuda/test_multiple_card.py
@@ -1,16 +1,18 @@
 import re
+import shutil
 import sys
 import unittest
-import shutil
+
 sys.path.insert(0, "../..")
 
 
 import torch
 from lm_eval.utils import make_table  # pylint: disable=E0401
 from transformers import AutoModelForCausalLM, AutoTokenizer
+
 from auto_round import AutoRound
 from auto_round.eval.evaluation import simple_evaluate
-from auto_round.testing_utils import multi_card, require_greater_than_050, require_gptqmodel
+from auto_round.testing_utils import multi_card, require_gptqmodel, require_greater_than_050
 
 
 def get_accuracy(data):
@@ -105,7 +107,6 @@ def test_device_map(self):
         autoround = AutoRound(model, tokenizer, iters=2, device_map=device_map, nsamples=7,seqlen=32)
         autoround.quantize()
 
-        from transformers import AutoModelForCausalLM, AutoTokenizer
         model_name = "OPEA/Meta-Llama-3.1-8B-Instruct-int4-sym-inc"
 
         device_map = {}
diff --git a/test/test_cuda/test_multiple_card_calib.py b/test/test_cuda/test_multiple_card_calib.py
index 63a494f75..8f3b584bc 100644
--- a/test/test_cuda/test_multiple_card_calib.py
+++ b/test/test_cuda/test_multiple_card_calib.py
@@ -1,13 +1,14 @@
 import os
 import re
-import sys
 import shutil
+import sys
 import unittest
 
 sys.path.insert(0, "../..")
 
 from auto_round.testing_utils import multi_card
 
+
 def get_accuracy(data):
     match = re.search(r'\|acc\s+\|[↑↓]\s+\|\s+([\d.]+)\|', data)
 
diff --git a/test/test_cuda/test_qbits.py b/test/test_cuda/test_qbits.py
index 8b79d97b5..4af9fb358 100644
--- a/test/test_cuda/test_qbits.py
+++ b/test/test_cuda/test_qbits.py
@@ -6,8 +6,8 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from auto_round import AutoRoundConfig, AutoRound
-from auto_round.testing_utils import require_itrex, require_gptqmodel
+from auto_round import AutoRound, AutoRoundConfig
+from auto_round.testing_utils import require_gptqmodel, require_itrex
 
 
 class TestAutoRound(unittest.TestCase):
@@ -78,8 +78,9 @@ def test_mixed_precision(self):
         layer_config["model.decoder.layers.0.self_attn.k_proj"] = {"bits": 8}
         layer_config["model.decoder.layers.6.self_attn.out_proj"] = {"bits": 2, "group_size": 32}
         bits, group_size, sym = 4, 128, True
-        from auto_round import  AutoRound
         import torch
+
+        from auto_round import AutoRound
         autoround = AutoRound(
             model,
             tokenizer,
diff --git a/test/test_cuda/test_support_vlms.py b/test/test_cuda/test_support_vlms.py
index ddca458aa..8208ae255 100644
--- a/test/test_cuda/test_support_vlms.py
+++ b/test/test_cuda/test_support_vlms.py
@@ -1,15 +1,16 @@
 import os
-import sys
 import shutil
+import sys
 import unittest
 
 sys.path.insert(0, '../..')
 
-from auto_round import AutoRoundConfig ## must import for auto-round format
-from auto_round.testing_utils import require_gptqmodel, require_vlm_env
 import requests
 from PIL import Image
 
+from auto_round import AutoRoundConfig  # # must import for auto-round format
+from auto_round.testing_utils import require_gptqmodel, require_vlm_env
+
 
 class TestSupportVLMS(unittest.TestCase):
     @classmethod
@@ -352,7 +353,7 @@ def test_deepseek_vl2(self):
         self.assertFalse(res > 0 or res == -1, msg="deepseek vl2 tuning fail")
 
         quantized_model_path = os.path.join(self.save_dir, "deepseek-vl2-tiny-w4g32")
-        from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
+        from deepseek_vl2.models import DeepseekVLV2ForCausalLM, DeepseekVLV2Processor
         from transformers import AutoModelForCausalLM
         vl_chat_processor: DeepseekVLV2Processor = DeepseekVLV2Processor.from_pretrained(quantized_model_path)
         tokenizer = vl_chat_processor.tokenizer
diff --git a/test/test_cuda/test_transformers.py b/test/test_cuda/test_transformers.py
index 527f5aec9..e429f14d4 100644
--- a/test/test_cuda/test_transformers.py
+++ b/test/test_cuda/test_transformers.py
@@ -26,7 +26,6 @@
 )
 from transformers.utils import is_torch_available
 
-
 if is_torch_available():
     import torch
 
diff --git a/test/test_cuda/test_triton_backend.py b/test/test_cuda/test_triton_backend.py
index a0d69322f..e31b659dd 100644
--- a/test/test_cuda/test_triton_backend.py
+++ b/test/test_cuda/test_triton_backend.py
@@ -4,12 +4,11 @@
 import unittest
 
 sys.path.insert(0, "../..")
-from auto_round.eval.evaluation import simple_evaluate_user_model
-
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from auto_round import AutoRound, AutoRoundConfig
+from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.testing_utils import require_greater_than_050
 
 
diff --git a/test/test_cuda/test_vlms.py b/test/test_cuda/test_vlms.py
index 15ad6ca1c..62444d786 100644
--- a/test/test_cuda/test_vlms.py
+++ b/test/test_cuda/test_vlms.py
@@ -1,16 +1,18 @@
-import re
-import os
-import sys
 import copy
+import os
+import re
 import shutil
+import sys
 import unittest
+
 import requests
 
 sys.path.insert(0, "../..")
 
 from PIL import Image
+
 from auto_round import AutoRoundConfig
-from auto_round.testing_utils import require_gptqmodel, require_vlm_env, require_optimum
+from auto_round.testing_utils import require_gptqmodel, require_optimum, require_vlm_env
 
 
 class TestAutoRound(unittest.TestCase):
@@ -42,8 +44,7 @@ def tearDownClass(self):
     #             res == """<s> There is a girl who likes adventure, and she is looking for a partner to go on a treasure hunt. She has found a map that leads to a hidden treasure, but she needs a partner to help her decipher the clues and find the treasure. You""")
 
     def qwen_inference(self, quantized_model_dir):
-        import requests
-        from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTokenizer
+        from transformers import AutoProcessor, AutoTokenizer, Qwen2VLForConditionalGeneration
         tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir)
         processor = AutoProcessor.from_pretrained(quantized_model_dir, trust_remote_code=True)
         model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -92,8 +93,9 @@ def qwen_inference(self, quantized_model_dir):
     @require_gptqmodel
     @require_optimum
     def test_vlm_tune(self):
+        from transformers import AutoProcessor, AutoTokenizer, Qwen2VLForConditionalGeneration
+
         from auto_round import AutoRoundMLLM
-        from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTokenizer
 
         ## load the model
         model_name = "/models/Qwen2-VL-2B-Instruct"
diff --git a/test/test_xpu/test_autoround.py b/test/test_xpu/test_autoround.py
index caad16634..0c547b236 100644
--- a/test/test_xpu/test_autoround.py
+++ b/test/test_xpu/test_autoround.py
@@ -8,8 +8,8 @@
 import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from auto_round import AutoRoundConfig
-from auto_round import AutoRound
+from auto_round import AutoRound, AutoRoundConfig
+
 
 class LLMDataLoader:
     def __init__(self):
@@ -52,7 +52,6 @@ def test_gptq_format(self):
         quantized_model_path = "./saved"
         autoround.quantize_and_save(output_dir=quantized_model_path)
 
-        from auto_round import AutoRoundConfig
         quantization_config = AutoRoundConfig(
             backend="auto"
         )
@@ -88,7 +87,6 @@ def test_awq_format(self):
         autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round:auto_awq")
 
         quantized_model_path = "./saved"
-        from auto_round import AutoRoundConfig
         quantization_config = AutoRoundConfig(
             backend="auto"
         )