Skip to content

Commit 1a35968

Browse files
fix pre-commit
Signed-off-by: chenmenglong <[email protected]>
1 parent 2236454 commit 1a35968

File tree

3 files changed

+36
-36
lines changed

3 files changed

+36
-36
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,7 @@ def forward_cuda(
487487
expert_load_view=expert_load_view,
488488
logical_to_physical_map=logical_to_physical_map,
489489
logical_replica_count=logical_replica_count,
490-
fused_experts_method=self.fused_experts
491-
)
490+
fused_experts_method=self.fused_experts)
492491

493492
if self.rocm_aiter_moe_enabled:
494493
return self.rocm_aiter_fused_experts(
@@ -1536,19 +1535,20 @@ def select_experts(
15361535
# 2. Record expert load metrics.
15371536

15381537
# When using FusedMoEModularKernel,
1539-
# expert load statistics are handled directly in the kernel using
1538+
# expert load statistics are handled directly in the kernel using
15401539
# ExpertTokensMetadata.expert_num_tokens for better performance.
1541-
# For other implementations or when metadata is not available,
1540+
# For other implementations or when metadata is not available,
15421541
# we fall back to here.
15431542

1544-
# There is no expert_num_tokens in
1543+
# There is no expert_num_tokens in
15451544
# expert_tokens_meta of DeepEPHTPrepareAndFinalize
1546-
# so it is not supported DeepEPHTPrepareAndFinalize for now.
1545+
# so it is not supported DeepEPHTPrepareAndFinalize for now.
15471546
# TODO: Maybe it is better to support DeepEPHTPrepareAndFinalize.
1548-
skip_expert_load_scatter_add = ((fused_experts_method is not None) and
1549-
isinstance(fused_experts_method, FusedMoEModularKernel) and
1550-
(fused_experts_method.prepare_finalize.__class__ !=
1551-
"DeepEPHTPrepareAndFinalize"))
1547+
skip_expert_load_scatter_add = (
1548+
(fused_experts_method is not None)
1549+
and isinstance(fused_experts_method, FusedMoEModularKernel)
1550+
and (fused_experts_method.prepare_finalize.__class__
1551+
!= "DeepEPHTPrepareAndFinalize"))
15521552

15531553
if not skip_expert_load_scatter_add:
15541554
logger.debug("expert_load_view update from topk_ids.")
@@ -1560,7 +1560,7 @@ def select_experts(
15601560
# Replace invalid expert ids with 0 (just a dummy position)
15611561
# to avoid out-of-bounds errors in scatter_add_
15621562
index = topk_ids_flatten.masked_fill_(invalid_mask, 0)
1563-
# `src` is the valid mask,
1563+
# `src` is the valid mask,
15641564
# which is 1 for valid and 0 for invalid
15651565
src = ~invalid_mask
15661566

@@ -1884,6 +1884,7 @@ def clear_expert_load_view(self):
18841884
if self.expert_load_view is not None:
18851885
self.expert_load_view.zero_()
18861886

1887+
18871888
def moe_forward(
18881889
hidden_states: torch.Tensor,
18891890
router_logits: torch.Tensor,

vllm/model_executor/layers/fused_moe/modular_kernel.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -849,29 +849,29 @@ def forward(
849849
_expert_topk_weights) = receiver()
850850

851851
# In EPLB, update expert load from expert_num_tokens.
852-
if (expert_tokens_meta is not None and expert_load_view is not None and
853-
expert_tokens_meta.expert_num_tokens is not None and
854-
expert_map is not None):
855-
# Initialize the mapping of the local physical experts
856-
# to global physical experts, after which it will not change.
857-
# `expert_load_view`: (num_physical_experts,)
858-
# `expert_num_tokens`: (local_num_physical_experts,)
859-
if self.expert_map is None:
860-
self.expert_map = expert_map.clone()
861-
self.local_to_global_physical_experts = \
862-
torch.nonzero(expert_map != -1,
863-
as_tuple=False).squeeze()
864-
else:
865-
if not torch.equal(self.expert_map, expert_map):
866-
self.expert_map = expert_map.clone()
867-
self.local_to_global_physical_experts = \
868-
torch.nonzero(expert_map != -1,
869-
as_tuple=False).squeeze()
870-
871-
# Use pre-computed expert token counts from metadata
872-
expert_load_view.scatter_add_(dim=0,
873-
index=self.local_to_global_physical_experts,
874-
src=expert_tokens_meta.expert_num_tokens)
852+
if (expert_tokens_meta is not None and expert_load_view is not None
853+
and expert_tokens_meta.expert_num_tokens is not None
854+
and expert_map is not None):
855+
# Initialize the mapping of the local physical experts
856+
# to global physical experts, after which it will not change.
857+
# `expert_load_view`: (num_physical_experts,)
858+
# `expert_num_tokens`: (local_num_physical_experts,)
859+
if self.expert_map is None:
860+
self.expert_map = expert_map.clone()
861+
self.local_to_global_physical_experts = \
862+
torch.nonzero(expert_map != -1,
863+
as_tuple=False).squeeze()
864+
else:
865+
if not torch.equal(self.expert_map, expert_map):
866+
self.expert_map = expert_map.clone()
867+
self.local_to_global_physical_experts = \
868+
torch.nonzero(expert_map != -1,
869+
as_tuple=False).squeeze()
870+
# Use pre-computed expert token counts from metadata
871+
expert_load_view.scatter_add_(
872+
dim=0,
873+
index=self.local_to_global_physical_experts,
874+
src=expert_tokens_meta.expert_num_tokens)
875875

876876
# Maybe prepare gathered topk_ids and topk_weights from other EP ranks.
877877
topk_ids = topk_ids if _expert_topk_ids is None else _expert_topk_ids

vllm/model_executor/layers/quantization/fp8.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,8 +1054,7 @@ def apply(
10541054
expert_load_view=expert_load_view,
10551055
logical_to_physical_map=logical_to_physical_map,
10561056
logical_replica_count=logical_replica_count,
1057-
fused_experts_method=self.fused_experts
1058-
)
1057+
fused_experts_method=self.fused_experts)
10591058

10601059
if self.rocm_aiter_moe_enabled:
10611060
from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa: E501

0 commit comments

Comments
 (0)