fix more lint errors

bnellnm · bnellnm · commit c04cb12a7e9d · 2025-05-07T19:20:04.000Z
Signed-off-by: Bill Nell &lt;bnell@redhat.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -2,11 +2,11 @@
 
 import importlib
 import threading
-import weakref
 from abc import abstractmethod
 from dataclasses import dataclass
 from enum import Enum
 from typing import Callable, List, Optional, Tuple
+from weakref import WeakValueDictionary
 
 import torch
 import torch.nn.functional as F
@@ -266,7 +266,7 @@ def apply(
 class AllToAllCache:
 
     def __init__(self):
-        self._cache = weakref.WeakValueDictionary()
+        self._cache: WeakValueDictionary = WeakValueDictionary()
         self._lock = threading.RLock()  # Reentrant lock for thread safety
 
     def get_or_create(self, **kwargs):
@@ -802,7 +802,8 @@ def __init__(
         if quant_config is None:
             quant_method = UnquantizedFusedMoEMethod(moe)
         else:
-            quant_method = quant_config.get_quant_method(self, prefix)
+            quant_method = quant_config.get_quant_method(
+                self, prefix)  # type: ignore
             assert isinstance(quant_method, FusedMoEMethodBase)
 
         assert quant_method is not None
@@ -812,7 +813,7 @@ def __init__(
 
         if dispatch_combine is not None:
             world_size = moe.ep_size
-            dp_size = moe.ep_size // moe.dp_size
+            dp_size = int(moe.ep_size // moe.dp_size)
             success = self.quant_method.set_dispatch_combine(
                 dp_size, world_size, dispatch_combine)
             if not success:
diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -339,27 +339,24 @@ def forward(
             a1, a1_scale, a2_scale, topk_weights, topk_ids, global_num_experts,
             expert_map, apply_router_weight_on_input)
 
-        if True:
-            fused_out = self.fused_experts.apply(
-                a1q,
-                w1,
-                w2,
-                topk_ids,
-                activation=activation,
-                global_num_experts=global_num_experts,
-                expert_map=expert_map,
-                w1_scale=w1_scale,
-                w2_scale=w2_scale,
-                w1_zp=w1_zp,
-                w2_zp=w2_zp,
-                a1q_scale=a1q_scale,
-                a2_scale=a2_scale,
-                workspace13=workspace13,
-                workspace2=workspace2,
-                expert_num_tokens=expert_num_tokens,
-            )
-        else:
-            fused_out = torch.empty_like(a1q)
+        fused_out = self.fused_experts.apply(
+            a1q,
+            w1,
+            w2,
+            topk_ids,
+            activation=activation,
+            global_num_experts=global_num_experts,
+            expert_map=expert_map,
+            w1_scale=w1_scale,
+            w2_scale=w2_scale,
+            w1_zp=w1_zp,
+            w2_zp=w2_zp,
+            a1q_scale=a1q_scale,
+            a2_scale=a2_scale,
+            workspace13=workspace13,
+            workspace2=workspace2,
+            expert_num_tokens=expert_num_tokens,
+        )
 
         self.dispatch_combine.combine(output, fused_out, topk_weights,
                                       topk_ids, apply_router_weight_on_input)
diff --git a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py
@@ -21,11 +21,10 @@ def __init__(self,
                  block_m: Optional[int] = None,
                  allow_deep_gemm: bool = False):
         super().__init__()
-        self.triton_expert = TritonExperts(use_fp8_w8a8, use_int8_w8a8,
-                                           use_int4_w4a16, use_int8_w8a16,
-                                           per_channel_quant, block_shape,
-                                           block_m)
-        self.deep_gemm_expert = DeepGemmExperts()
+        self.triton_expert: TritonExperts = TritonExperts(
+            use_fp8_w8a8, use_int8_w8a8, use_int4_w4a16, use_int8_w8a16,
+            per_channel_quant, block_shape, block_m)
+        self.deep_gemm_expert: DeepGemmExperts = DeepGemmExperts()
         self.allow_deep_gemm = allow_deep_gemm
         self.use_fp8_w8a8 = use_fp8_w8a8