fix pre-commit

dragondream-chen · dsxsteven · commit 25e676255d90 · 2025-10-17T11:44:09.000+08:00
Signed-off-by: chenmenglong &lt;chenmenglong1@huawei.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -636,7 +636,8 @@ def forward_cuda(
             expert_map=expert_map,
             expert_load_view=expert_load_view,
             logical_to_physical_map=logical_to_physical_map,
-            logical_replica_count=logical_replica_count)
+            logical_replica_count=logical_replica_count,
+            fused_experts_method=self.fused_experts)
 
         if self.rocm_aiter_moe_enabled:
             assert self.fused_experts is None
@@ -2056,19 +2057,20 @@ def select_experts(
             # 2. Record expert load metrics.
 
             # When using FusedMoEModularKernel,
-            # expert load statistics are handled directly in the kernel using 
+            # expert load statistics are handled directly in the kernel using
             # ExpertTokensMetadata.expert_num_tokens for better performance.
-            # For other implementations or when metadata is not available, 
+            # For other implementations or when metadata is not available,
             # we fall back to here.
             
-            # There is no expert_num_tokens in 
+            # There is no expert_num_tokens in
             # expert_tokens_meta of DeepEPHTPrepareAndFinalize
-            # so it is not supported DeepEPHTPrepareAndFinalize for now. 
+            # so it is not supported DeepEPHTPrepareAndFinalize for now.
             # TODO: Maybe it is better to support DeepEPHTPrepareAndFinalize.
-            skip_expert_load_scatter_add = ((fused_experts_method is not None) and 
-                isinstance(fused_experts_method, FusedMoEModularKernel) and 
-                (fused_experts_method.prepare_finalize.__class__ != 
-                "DeepEPHTPrepareAndFinalize"))
+            skip_expert_load_scatter_add = (
+                (fused_experts_method is not None)
+                and isinstance(fused_experts_method, FusedMoEModularKernel)
+                and (fused_experts_method.prepare_finalize.__class__
+                     != "DeepEPHTPrepareAndFinalize"))
 
             if not skip_expert_load_scatter_add:
                 logger.debug("expert_load_view update from topk_ids.")
@@ -2080,7 +2082,7 @@ def select_experts(
                 # Replace invalid expert ids with 0 (just a dummy position)
                 # to avoid out-of-bounds errors in scatter_add_
                 index = topk_ids_flatten.masked_fill_(invalid_mask, 0)
-                # `src` is the valid mask, 
+                # `src` is the valid mask,
                 # which is 1 for valid and 0 for invalid
                 src = ~invalid_mask
 
@@ -2510,6 +2512,7 @@ def clear_expert_load_view(self):
         if self.expert_load_view is not None:
             self.expert_load_view.zero_()
 
+
 def moe_forward(
     hidden_states: torch.Tensor,
     router_logits: torch.Tensor,
diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -1243,29 +1243,29 @@ def forward(
              _expert_topk_weights) = receiver()
 
         # In EPLB, update expert load from expert_num_tokens.
-        if (expert_tokens_meta is not None and expert_load_view is not None and
-            expert_tokens_meta.expert_num_tokens is not None and
-            expert_map is not None):
-                # Initialize the mapping of the local physical experts
-                # to global physical experts, after which it will not change.
-                # `expert_load_view`: (num_physical_experts,)
-                # `expert_num_tokens`: (local_num_physical_experts,)
-                if self.expert_map is None:
-                    self.expert_map = expert_map.clone()
-                    self.local_to_global_physical_experts = \
-                        torch.nonzero(expert_map != -1,
-                                      as_tuple=False).squeeze()
-                else:
-                    if not torch.equal(self.expert_map, expert_map):
-                        self.expert_map = expert_map.clone()
-                        self.local_to_global_physical_experts = \
-                            torch.nonzero(expert_map != -1, 
-                                          as_tuple=False).squeeze()
-
-                # Use pre-computed expert token counts from metadata
-                expert_load_view.scatter_add_(dim=0,
-                    index=self.local_to_global_physical_experts,
-                    src=expert_tokens_meta.expert_num_tokens)
+        if (expert_tokens_meta is not None and expert_load_view is not None
+                and expert_tokens_meta.expert_num_tokens is not None
+                and expert_map is not None):
+            # Initialize the mapping of the local physical experts
+            # to global physical experts, after which it will not change.
+            # `expert_load_view`: (num_physical_experts,)
+            # `expert_num_tokens`: (local_num_physical_experts,)
+            if self.expert_map is None:
+                self.expert_map = expert_map.clone()
+                self.local_to_global_physical_experts = \
+                    torch.nonzero(expert_map != -1,
+                                  as_tuple=False).squeeze()
+            else:
+                if not torch.equal(self.expert_map, expert_map):
+                     self.expert_map = expert_map.clone()
+                     self.local_to_global_physical_experts = \
+                         torch.nonzero(expert_map != -1,
+                                       as_tuple=False).squeeze()
+            # Use pre-computed expert token counts from metadata
+            expert_load_view.scatter_add_(
+                dim=0,
+                index=self.local_to_global_physical_experts,
+                src=expert_tokens_meta.expert_num_tokens)
 
         # Maybe prepare gathered topk_ids and topk_weights from other EP ranks.
         topk_ids = topk_ids if _expert_topk_ids is None else _expert_topk_ids
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
@@ -1231,8 +1231,7 @@ def apply(
             expert_load_view=expert_load_view,
             logical_to_physical_map=logical_to_physical_map,
             logical_replica_count=logical_replica_count,
-            fused_experts_method=self.fused_experts
-        )
+            fused_experts_method=self.fused_experts)
 
         if self.rocm_aiter_moe_enabled:
             from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (  # noqa: E501