Skip to content

Commit 25e6762

Browse files
dragondream-chendsxsteven
authored andcommitted
fix pre-commit
Signed-off-by: chenmenglong <[email protected]>
1 parent c7cea53 commit 25e6762

File tree

3 files changed

+37
-35
lines changed

3 files changed

+37
-35
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,8 @@ def forward_cuda(
636636
expert_map=expert_map,
637637
expert_load_view=expert_load_view,
638638
logical_to_physical_map=logical_to_physical_map,
639-
logical_replica_count=logical_replica_count)
639+
logical_replica_count=logical_replica_count,
640+
fused_experts_method=self.fused_experts)
640641

641642
if self.rocm_aiter_moe_enabled:
642643
assert self.fused_experts is None
@@ -2056,19 +2057,20 @@ def select_experts(
20562057
# 2. Record expert load metrics.
20572058

20582059
# When using FusedMoEModularKernel,
2059-
# expert load statistics are handled directly in the kernel using
2060+
# expert load statistics are handled directly in the kernel using
20602061
# ExpertTokensMetadata.expert_num_tokens for better performance.
2061-
# For other implementations or when metadata is not available,
2062+
# For other implementations or when metadata is not available,
20622063
# we fall back to here.
20632064

2064-
# There is no expert_num_tokens in
2065+
# There is no expert_num_tokens in
20652066
# expert_tokens_meta of DeepEPHTPrepareAndFinalize
2066-
# so it is not supported DeepEPHTPrepareAndFinalize for now.
2067+
# so it is not supported DeepEPHTPrepareAndFinalize for now.
20672068
# TODO: Maybe it is better to support DeepEPHTPrepareAndFinalize.
2068-
skip_expert_load_scatter_add = ((fused_experts_method is not None) and
2069-
isinstance(fused_experts_method, FusedMoEModularKernel) and
2070-
(fused_experts_method.prepare_finalize.__class__ !=
2071-
"DeepEPHTPrepareAndFinalize"))
2069+
skip_expert_load_scatter_add = (
2070+
(fused_experts_method is not None)
2071+
and isinstance(fused_experts_method, FusedMoEModularKernel)
2072+
and (fused_experts_method.prepare_finalize.__class__
2073+
!= "DeepEPHTPrepareAndFinalize"))
20722074

20732075
if not skip_expert_load_scatter_add:
20742076
logger.debug("expert_load_view update from topk_ids.")
@@ -2080,7 +2082,7 @@ def select_experts(
20802082
# Replace invalid expert ids with 0 (just a dummy position)
20812083
# to avoid out-of-bounds errors in scatter_add_
20822084
index = topk_ids_flatten.masked_fill_(invalid_mask, 0)
2083-
# `src` is the valid mask,
2085+
# `src` is the valid mask,
20842086
# which is 1 for valid and 0 for invalid
20852087
src = ~invalid_mask
20862088

@@ -2510,6 +2512,7 @@ def clear_expert_load_view(self):
25102512
if self.expert_load_view is not None:
25112513
self.expert_load_view.zero_()
25122514

2515+
25132516
def moe_forward(
25142517
hidden_states: torch.Tensor,
25152518
router_logits: torch.Tensor,

vllm/model_executor/layers/fused_moe/modular_kernel.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,29 +1243,29 @@ def forward(
12431243
_expert_topk_weights) = receiver()
12441244

12451245
# In EPLB, update expert load from expert_num_tokens.
1246-
if (expert_tokens_meta is not None and expert_load_view is not None and
1247-
expert_tokens_meta.expert_num_tokens is not None and
1248-
expert_map is not None):
1249-
# Initialize the mapping of the local physical experts
1250-
# to global physical experts, after which it will not change.
1251-
# `expert_load_view`: (num_physical_experts,)
1252-
# `expert_num_tokens`: (local_num_physical_experts,)
1253-
if self.expert_map is None:
1254-
self.expert_map = expert_map.clone()
1255-
self.local_to_global_physical_experts = \
1256-
torch.nonzero(expert_map != -1,
1257-
as_tuple=False).squeeze()
1258-
else:
1259-
if not torch.equal(self.expert_map, expert_map):
1260-
self.expert_map = expert_map.clone()
1261-
self.local_to_global_physical_experts = \
1262-
torch.nonzero(expert_map != -1,
1263-
as_tuple=False).squeeze()
1264-
1265-
# Use pre-computed expert token counts from metadata
1266-
expert_load_view.scatter_add_(dim=0,
1267-
index=self.local_to_global_physical_experts,
1268-
src=expert_tokens_meta.expert_num_tokens)
1246+
if (expert_tokens_meta is not None and expert_load_view is not None
1247+
and expert_tokens_meta.expert_num_tokens is not None
1248+
and expert_map is not None):
1249+
# Initialize the mapping of the local physical experts
1250+
# to global physical experts, after which it will not change.
1251+
# `expert_load_view`: (num_physical_experts,)
1252+
# `expert_num_tokens`: (local_num_physical_experts,)
1253+
if self.expert_map is None:
1254+
self.expert_map = expert_map.clone()
1255+
self.local_to_global_physical_experts = \
1256+
torch.nonzero(expert_map != -1,
1257+
as_tuple=False).squeeze()
1258+
else:
1259+
if not torch.equal(self.expert_map, expert_map):
1260+
self.expert_map = expert_map.clone()
1261+
self.local_to_global_physical_experts = \
1262+
torch.nonzero(expert_map != -1,
1263+
as_tuple=False).squeeze()
1264+
# Use pre-computed expert token counts from metadata
1265+
expert_load_view.scatter_add_(
1266+
dim=0,
1267+
index=self.local_to_global_physical_experts,
1268+
src=expert_tokens_meta.expert_num_tokens)
12691269

12701270
# Maybe prepare gathered topk_ids and topk_weights from other EP ranks.
12711271
topk_ids = topk_ids if _expert_topk_ids is None else _expert_topk_ids

vllm/model_executor/layers/quantization/fp8.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,8 +1231,7 @@ def apply(
12311231
expert_load_view=expert_load_view,
12321232
logical_to_physical_map=logical_to_physical_map,
12331233
logical_replica_count=logical_replica_count,
1234-
fused_experts_method=self.fused_experts
1235-
)
1234+
fused_experts_method=self.fused_experts)
12361235

12371236
if self.rocm_aiter_moe_enabled:
12381237
from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa: E501

0 commit comments

Comments
 (0)