@@ -223,6 +223,8 @@ extern cl::opt<bool> EnableMatrix;
223223
224224extern cl::opt<bool > DisablePreInliner;
225225extern cl::opt<int > PreInlineThreshold;
226+
227+ extern cl::opt<bool > SYCLOptimizationMode;
226228} // namespace llvm
227229
228230void PassBuilder::invokePeepholeEPCallbacks (FunctionPassManager &FPM,
@@ -271,78 +273,88 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
271273 // Form canonically associated expression trees, and simplify the trees using
272274 // basic mathematical properties. For example, this will form (nearly)
273275 // minimal multiplication trees.
274- FPM.addPass (ReassociatePass ());
275-
276- // Add the primary loop simplification pipeline.
277- // FIXME: Currently this is split into two loop pass pipelines because we run
278- // some function passes in between them. These can and should be removed
279- // and/or replaced by scheduling the loop pass equivalents in the correct
280- // positions. But those equivalent passes aren't powerful enough yet.
281- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
282- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
283- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
284- // `LoopInstSimplify`.
285- LoopPassManager LPM1, LPM2;
286-
287- // Simplify the loop body. We do this initially to clean up after other loop
288- // passes run, either when iterating on a loop or on inner loops with
289- // implications on the outer loop.
290- LPM1.addPass (LoopInstSimplifyPass ());
291- LPM1.addPass (LoopSimplifyCFGPass ());
292-
293- // Try to remove as much code from the loop header as possible,
294- // to reduce amount of IR that will have to be duplicated.
295- // TODO: Investigate promotion cap for O1.
296- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
297-
298- LPM1.addPass (LoopRotatePass (/* Disable header duplication */ true ,
299- isLTOPreLink (Phase)));
300- // TODO: Investigate promotion cap for O1.
301- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
302- LPM1.addPass (SimpleLoopUnswitchPass ());
303- if (EnableLoopFlatten)
304- LPM1.addPass (LoopFlattenPass ());
305-
306- LPM2.addPass (LoopIdiomRecognizePass ());
307- LPM2.addPass (IndVarSimplifyPass ());
308-
309- for (auto &C : LateLoopOptimizationsEPCallbacks)
310- C (LPM2, Level);
311-
312- LPM2.addPass (LoopDeletionPass ());
313-
314- if (EnableLoopInterchange)
315- LPM2.addPass (LoopInterchangePass ());
316-
317- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
318- // because it changes IR to makes profile annotation in back compile
319- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
320- // attributes so we need to make sure and allow the full unroll pass to pay
321- // attention to it.
322- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
323- PGOOpt->Action != PGOOptions::SampleUse)
324- LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
325- /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
326- PTO.ForgetAllSCEVInLoopUnroll ));
327-
328- for (auto &C : LoopOptimizerEndEPCallbacks)
329- C (LPM2, Level);
330-
331- // We provide the opt remark emitter pass for LICM to use. We only need to do
332- // this once as it is immutable.
333- FPM.addPass (
334- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
335- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM1),
336- /* UseMemorySSA=*/ true ,
337- /* UseBlockFrequencyInfo=*/ true ));
338- FPM.addPass (SimplifyCFGPass ());
339- FPM.addPass (InstCombinePass ());
340- // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
341- // *All* loop passes must preserve it, in order to be able to use it.
342- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM2),
343- /* UseMemorySSA=*/ false ,
344- /* UseBlockFrequencyInfo=*/ false ));
276+ if (!SYCLOptimizationMode) {
277+ // FIXME: re-association increases variables liveness and therefore register
278+ // pressure.
279+ FPM.addPass (ReassociatePass ());
280+
281+ // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
282+ // optimizations rely on TTI, which is not accurate for SPIR target.
283+
284+ // Add the primary loop simplification pipeline.
285+ // FIXME: Currently this is split into two loop pass pipelines because we
286+ // run some function passes in between them. These can and should be removed
287+ // and/or replaced by scheduling the loop pass equivalents in the correct
288+ // positions. But those equivalent passes aren't powerful enough yet.
289+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
290+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet
291+ // to fully replace `SimplifyCFGPass`, and the closest to the other we have
292+ // is `LoopInstSimplify`.
293+ LoopPassManager LPM1, LPM2;
294+
295+ // Simplify the loop body. We do this initially to clean up after other loop
296+ // passes run, either when iterating on a loop or on inner loops with
297+ // implications on the outer loop.
298+ LPM1.addPass (LoopInstSimplifyPass ());
299+ LPM1.addPass (LoopSimplifyCFGPass ());
300+
301+ // Try to remove as much code from the loop header as possible,
302+ // to reduce amount of IR that will have to be duplicated.
303+ // TODO: Investigate promotion cap for O1.
304+ LPM1.addPass (
305+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
306+
307+ LPM1.addPass (LoopRotatePass (/* Disable header duplication */ true ,
308+ isLTOPreLink (Phase)));
309+ // TODO: Investigate promotion cap for O1.
310+ LPM1.addPass (
311+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
312+ LPM1.addPass (SimpleLoopUnswitchPass ());
313+ if (EnableLoopFlatten)
314+ LPM1.addPass (LoopFlattenPass ());
315+
316+ LPM2.addPass (LoopIdiomRecognizePass ());
317+ LPM2.addPass (IndVarSimplifyPass ());
318+
319+ for (auto &C : LateLoopOptimizationsEPCallbacks)
320+ C (LPM2, Level);
345321
322+ LPM2.addPass (LoopDeletionPass ());
323+
324+ if (EnableLoopInterchange)
325+ LPM2.addPass (LoopInterchangePass ());
326+
327+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
328+ // because it changes IR to makes profile annotation in back compile
329+ // inaccurate. The normal unroller doesn't pay attention to forced full
330+ // unroll attributes so we need to make sure and allow the full unroll pass
331+ // to pay attention to it.
332+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
333+ PGOOpt->Action != PGOOptions::SampleUse)
334+ LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
335+ /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
336+ PTO.ForgetAllSCEVInLoopUnroll ));
337+
338+ for (auto &C : LoopOptimizerEndEPCallbacks)
339+ C (LPM2, Level);
340+
341+ // We provide the opt remark emitter pass for LICM to use. We only need to
342+ // do this once as it is immutable.
343+ FPM.addPass (
344+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
345+ FPM.addPass (
346+ createFunctionToLoopPassAdaptor (std::move (LPM1),
347+ /* UseMemorySSA=*/ true ,
348+ /* UseBlockFrequencyInfo=*/ true ));
349+ FPM.addPass (SimplifyCFGPass ());
350+ FPM.addPass (InstCombinePass ());
351+ // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
352+ // *All* loop passes must preserve it, in order to be able to use it.
353+ FPM.addPass (
354+ createFunctionToLoopPassAdaptor (std::move (LPM2),
355+ /* UseMemorySSA=*/ false ,
356+ /* UseBlockFrequencyInfo=*/ false ));
357+ }
346358 // Delete small array after loop unroll.
347359 FPM.addPass (SROAPass ());
348360
@@ -443,81 +455,92 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
443455 // Form canonically associated expression trees, and simplify the trees using
444456 // basic mathematical properties. For example, this will form (nearly)
445457 // minimal multiplication trees.
446- FPM.addPass (ReassociatePass ());
447-
448- // Add the primary loop simplification pipeline.
449- // FIXME: Currently this is split into two loop pass pipelines because we run
450- // some function passes in between them. These can and should be removed
451- // and/or replaced by scheduling the loop pass equivalents in the correct
452- // positions. But those equivalent passes aren't powerful enough yet.
453- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
454- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
455- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
456- // `LoopInstSimplify`.
457- LoopPassManager LPM1, LPM2;
458-
459- // Simplify the loop body. We do this initially to clean up after other loop
460- // passes run, either when iterating on a loop or on inner loops with
461- // implications on the outer loop.
462- LPM1.addPass (LoopInstSimplifyPass ());
463- LPM1.addPass (LoopSimplifyCFGPass ());
464-
465- // Try to remove as much code from the loop header as possible,
466- // to reduce amount of IR that will have to be duplicated.
467- // TODO: Investigate promotion cap for O1.
468- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
458+ if (!SYCLOptimizationMode) {
459+ // FIXME: re-association increases variables liveness and therefore register
460+ // pressure.
461+ FPM.addPass (ReassociatePass ());
462+
463+ // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
464+ // optimizations rely on TTI, which is not accurate for SPIR target.
465+
466+ // Add the primary loop simplification pipeline.
467+ // FIXME: Currently this is split into two loop pass pipelines because we
468+ // run some function passes in between them. These can and should be removed
469+ // and/or replaced by scheduling the loop pass equivalents in the correct
470+ // positions. But those equivalent passes aren't powerful enough yet.
471+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
472+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet
473+ // to fully replace `SimplifyCFGPass`, and the closest to the other we have
474+ // is `LoopInstSimplify`.
475+ LoopPassManager LPM1, LPM2;
476+
477+ // Simplify the loop body. We do this initially to clean up after other loop
478+ // passes run, either when iterating on a loop or on inner loops with
479+ // implications on the outer loop.
480+ LPM1.addPass (LoopInstSimplifyPass ());
481+ LPM1.addPass (LoopSimplifyCFGPass ());
482+
483+ // Try to remove as much code from the loop header as possible,
484+ // to reduce amount of IR that will have to be duplicated.
485+ // TODO: Investigate promotion cap for O1.
486+ LPM1.addPass (
487+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
488+
489+ // Disable header duplication in loop rotation at -Oz.
490+ LPM1.addPass (
491+ LoopRotatePass (Level != OptimizationLevel::Oz, isLTOPreLink (Phase)));
492+ // TODO: Investigate promotion cap for O1.
493+ LPM1.addPass (
494+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
495+ LPM1.addPass (SimpleLoopUnswitchPass (/* NonTrivial */ Level ==
496+ OptimizationLevel::O3 &&
497+ EnableO3NonTrivialUnswitching));
498+ if (EnableLoopFlatten)
499+ LPM1.addPass (LoopFlattenPass ());
500+
501+ LPM2.addPass (LoopIdiomRecognizePass ());
502+ LPM2.addPass (IndVarSimplifyPass ());
469503
470- // Disable header duplication in loop rotation at -Oz.
471- LPM1.addPass (
472- LoopRotatePass (Level != OptimizationLevel::Oz, isLTOPreLink (Phase)));
473- // TODO: Investigate promotion cap for O1.
474- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
475- LPM1.addPass (
476- SimpleLoopUnswitchPass (/* NonTrivial */ Level == OptimizationLevel::O3 &&
477- EnableO3NonTrivialUnswitching));
478- if (EnableLoopFlatten)
479- LPM1.addPass (LoopFlattenPass ());
480-
481- LPM2.addPass (LoopIdiomRecognizePass ());
482- LPM2.addPass (IndVarSimplifyPass ());
483-
484- for (auto &C : LateLoopOptimizationsEPCallbacks)
485- C (LPM2, Level);
486-
487- LPM2.addPass (LoopDeletionPass ());
488-
489- if (EnableLoopInterchange)
490- LPM2.addPass (LoopInterchangePass ());
491-
492- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
493- // because it changes IR to makes profile annotation in back compile
494- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
495- // attributes so we need to make sure and allow the full unroll pass to pay
496- // attention to it.
497- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
498- PGOOpt->Action != PGOOptions::SampleUse)
499- LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
500- /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
501- PTO.ForgetAllSCEVInLoopUnroll ));
502-
503- for (auto &C : LoopOptimizerEndEPCallbacks)
504- C (LPM2, Level);
505-
506- // We provide the opt remark emitter pass for LICM to use. We only need to do
507- // this once as it is immutable.
508- FPM.addPass (
509- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
510- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM1),
511- /* UseMemorySSA=*/ true ,
512- /* UseBlockFrequencyInfo=*/ true ));
513- FPM.addPass (SimplifyCFGPass ());
514- FPM.addPass (InstCombinePass ());
515- // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
516- // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
517- // *All* loop passes must preserve it, in order to be able to use it.
518- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM2),
519- /* UseMemorySSA=*/ false ,
520- /* UseBlockFrequencyInfo=*/ false ));
504+ for (auto &C : LateLoopOptimizationsEPCallbacks)
505+ C (LPM2, Level);
506+
507+ LPM2.addPass (LoopDeletionPass ());
508+
509+ if (EnableLoopInterchange)
510+ LPM2.addPass (LoopInterchangePass ());
511+
512+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
513+ // because it changes IR to makes profile annotation in back compile
514+ // inaccurate. The normal unroller doesn't pay attention to forced full
515+ // unroll attributes so we need to make sure and allow the full unroll pass
516+ // to pay attention to it.
517+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
518+ PGOOpt->Action != PGOOptions::SampleUse)
519+ LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
520+ /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
521+ PTO.ForgetAllSCEVInLoopUnroll ));
522+
523+ for (auto &C : LoopOptimizerEndEPCallbacks)
524+ C (LPM2, Level);
525+
526+ // We provide the opt remark emitter pass for LICM to use. We only need to
527+ // do this once as it is immutable.
528+ FPM.addPass (
529+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
530+ FPM.addPass (
531+ createFunctionToLoopPassAdaptor (std::move (LPM1),
532+ /* UseMemorySSA=*/ true ,
533+ /* UseBlockFrequencyInfo=*/ true ));
534+ FPM.addPass (SimplifyCFGPass ());
535+ FPM.addPass (InstCombinePass ());
536+ // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
537+ // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
538+ // *All* loop passes must preserve it, in order to be able to use it.
539+ FPM.addPass (
540+ createFunctionToLoopPassAdaptor (std::move (LPM2),
541+ /* UseMemorySSA=*/ false ,
542+ /* UseBlockFrequencyInfo=*/ false ));
543+ }
521544
522545 // Delete small array after loop unroll.
523546 FPM.addPass (SROAPass ());
@@ -1162,29 +1185,32 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
11621185 for (auto &C : VectorizerStartEPCallbacks)
11631186 C (OptimizePM, Level);
11641187
1165- LoopPassManager LPM;
1166- // First rotate loops that may have been un-rotated by prior passes.
1167- // Disable header duplication at -Oz.
1168- LPM.addPass (LoopRotatePass (Level != OptimizationLevel::Oz, LTOPreLink));
1169- // Some loops may have become dead by now. Try to delete them.
1170- // FIXME: see discussion in https://reviews.llvm.org/D112851,
1171- // this may need to be revisited once we run GVN before loop deletion
1172- // in the simplification pipeline.
1173- LPM.addPass (LoopDeletionPass ());
1174- OptimizePM.addPass (createFunctionToLoopPassAdaptor (
1175- std::move (LPM), /* UseMemorySSA=*/ false , /* UseBlockFrequencyInfo=*/ false ));
1176-
1177- // Distribute loops to allow partial vectorization. I.e. isolate dependences
1178- // into separate loop that would otherwise inhibit vectorization. This is
1179- // currently only performed for loops marked with the metadata
1180- // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1181- OptimizePM.addPass (LoopDistributePass ());
1182-
1183- // Populates the VFABI attribute with the scalar-to-vector mappings
1184- // from the TargetLibraryInfo.
1185- OptimizePM.addPass (InjectTLIMappings ());
1186-
1187- addVectorPasses (Level, OptimizePM, /* IsFullLTO */ false );
1188+ if (!SYCLOptimizationMode) {
1189+ LoopPassManager LPM;
1190+ // First rotate loops that may have been un-rotated by prior passes.
1191+ // Disable header duplication at -Oz.
1192+ LPM.addPass (LoopRotatePass (Level != OptimizationLevel::Oz, LTOPreLink));
1193+ // Some loops may have become dead by now. Try to delete them.
1194+ // FIXME: see discussion in https://reviews.llvm.org/D112851,
1195+ // this may need to be revisited once we run GVN before loop deletion
1196+ // in the simplification pipeline.
1197+ LPM.addPass (LoopDeletionPass ());
1198+ OptimizePM.addPass (
1199+ createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA=*/ false ,
1200+ /* UseBlockFrequencyInfo=*/ false ));
1201+
1202+ // Distribute loops to allow partial vectorization. I.e. isolate dependences
1203+ // into separate loop that would otherwise inhibit vectorization. This is
1204+ // currently only performed for loops marked with the metadata
1205+ // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1206+ OptimizePM.addPass (LoopDistributePass ());
1207+
1208+ // Populates the VFABI attribute with the scalar-to-vector mappings
1209+ // from the TargetLibraryInfo.
1210+ OptimizePM.addPass (InjectTLIMappings ());
1211+
1212+ addVectorPasses (Level, OptimizePM, /* IsFullLTO */ false );
1213+ }
11881214
11891215 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
11901216 // canonicalization pass that enables other optimizations. As a result,
0 commit comments