File tree Expand file tree Collapse file tree 1 file changed +23
-0
lines changed Expand file tree Collapse file tree 1 file changed +23
-0
lines changed Original file line number Diff line number Diff line change @@ -1048,6 +1048,29 @@ class AllreduceOp
10481048            return  AllReduceStrategyType::NCCL;
10491049        }
10501050
1051+         //  This rule based heuristic only chooses between NCCL and MIN_LATENCY strategies.
1052+ 
1053+         //  Heurisitic will only be applied on NONE and RESIDUAL_RMS_NORM fusion types.
1054+         //  Because NCCL might be faster on some large messageSize cases.
1055+         //  Otherwise, MIN_LATENCY strategy will be directly returned due to more fusions it can support.
1056+         //  TODO: NCCL AllReduce + subsequent quantization ops (as fallback) can also support the fusion types.
1057+         //  This should be compared with MIN_LATENCY fused kernels to determine the best strategy.
1058+         switch  (mOp )
1059+         {
1060+         case  AllReduceFusionOp::NONE:
1061+         case  AllReduceFusionOp::RESIDUAL_RMS_NORM: break ;
1062+         case  AllReduceFusionOp::RESIDUAL_RMS_NORM_QUANT_FP8:
1063+         case  AllReduceFusionOp::RESIDUAL_RMS_NORM_OUT_QUANT_FP8:
1064+         case  AllReduceFusionOp::RESIDUAL_RMS_NORM_QUANT_NVFP4:
1065+         case  AllReduceFusionOp::RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4: return  AllReduceStrategyType::MIN_LATENCY;
1066+         //  Suppose NCCL has fallback implementations for all fusion types.
1067+         default : return  AllReduceStrategyType::NCCL;
1068+         }
1069+ 
1070+         //  Check mOp to be supported by the heuristic.
1071+         TORCH_CHECK (mOp  == AllReduceFusionOp::NONE || mOp  == AllReduceFusionOp::RESIDUAL_RMS_NORM,
1072+             " Only NONE and RESIDUAL_RMS_NORM are supported for NCCL/MIN_LATENCY heuristic." 
1073+ 
10511074        //  Default to NCCL.
10521075        AllReduceStrategyType strategy = AllReduceStrategyType::NCCL;
10531076
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments