Skip to content

Commit 6535fa5

Browse files
committed
Reverse allreduceOp.cpp changes.
Signed-off-by: Yukun He <[email protected]>
1 parent a4fb14f commit 6535fa5

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

cpp/tensorrt_llm/thop/allreduceOp.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,6 +1048,29 @@ class AllreduceOp
10481048
return AllReduceStrategyType::NCCL;
10491049
}
10501050

1051+
// This rule based heuristic only chooses between NCCL and MIN_LATENCY strategies.
1052+
1053+
// Heurisitic will only be applied on NONE and RESIDUAL_RMS_NORM fusion types.
1054+
// Because NCCL might be faster on some large messageSize cases.
1055+
// Otherwise, MIN_LATENCY strategy will be directly returned due to more fusions it can support.
1056+
// TODO: NCCL AllReduce + subsequent quantization ops (as fallback) can also support the fusion types.
1057+
// This should be compared with MIN_LATENCY fused kernels to determine the best strategy.
1058+
switch (mOp)
1059+
{
1060+
case AllReduceFusionOp::NONE:
1061+
case AllReduceFusionOp::RESIDUAL_RMS_NORM: break;
1062+
case AllReduceFusionOp::RESIDUAL_RMS_NORM_QUANT_FP8:
1063+
case AllReduceFusionOp::RESIDUAL_RMS_NORM_OUT_QUANT_FP8:
1064+
case AllReduceFusionOp::RESIDUAL_RMS_NORM_QUANT_NVFP4:
1065+
case AllReduceFusionOp::RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4: return AllReduceStrategyType::MIN_LATENCY;
1066+
// Suppose NCCL has fallback implementations for all fusion types.
1067+
default: return AllReduceStrategyType::NCCL;
1068+
}
1069+
1070+
// Check mOp to be supported by the heuristic.
1071+
TORCH_CHECK(mOp == AllReduceFusionOp::NONE || mOp == AllReduceFusionOp::RESIDUAL_RMS_NORM,
1072+
"Only NONE and RESIDUAL_RMS_NORM are supported for NCCL/MIN_LATENCY heuristic.");
1073+
10511074
// Default to NCCL.
10521075
AllReduceStrategyType strategy = AllReduceStrategyType::NCCL;
10531076

0 commit comments

Comments
 (0)