@@ -27,8 +27,9 @@ use rustc_session::config::{
27
27
} ;
28
28
use rustc_span:: { BytePos , InnerSpan , Pos , SpanData , SyntaxContext , sym} ;
29
29
use rustc_target:: spec:: { CodeModel , RelocModel , SanitizerSet , SplitDebuginfo , TlsModel } ;
30
- use tracing:: debug;
30
+ use tracing:: { debug, trace } ;
31
31
32
+ //use crate::back::autodiff::*;
32
33
use crate :: back:: lto:: ThinBuffer ;
33
34
use crate :: back:: owned_target_machine:: OwnedTargetMachine ;
34
35
use crate :: back:: profiling:: {
@@ -529,9 +530,35 @@ pub(crate) unsafe fn llvm_optimize(
529
530
config : & ModuleConfig ,
530
531
opt_level : config:: OptLevel ,
531
532
opt_stage : llvm:: OptStage ,
533
+ skip_size_increasing_opts : bool ,
532
534
) -> Result < ( ) , FatalError > {
533
- let unroll_loops =
534
- opt_level != config:: OptLevel :: Size && opt_level != config:: OptLevel :: SizeMin ;
535
+ // Enzyme:
536
+ // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
537
+ // source code. However, benchmarks show that optimizations increasing the code size
538
+ // tend to reduce AD performance. Therefore deactivate them before AD, then differentiate the code
539
+ // and finally re-optimize the module, now with all optimizations available.
540
+ // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
541
+ // differentiated.
542
+
543
+ let unroll_loops;
544
+ let vectorize_slp;
545
+ let vectorize_loop;
546
+
547
+ // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
548
+ // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
549
+ // we should make this more granular, or at least check that the user has at least one autodiff
550
+ // call in their code, to justify altering the compilation pipeline.
551
+ if skip_size_increasing_opts && cfg ! ( llvm_enzyme) {
552
+ unroll_loops = false ;
553
+ vectorize_slp = false ;
554
+ vectorize_loop = false ;
555
+ } else {
556
+ unroll_loops =
557
+ opt_level != config:: OptLevel :: Size && opt_level != config:: OptLevel :: SizeMin ;
558
+ vectorize_slp = config. vectorize_slp ;
559
+ vectorize_loop = config. vectorize_loop ;
560
+ }
561
+ trace ! ( ?unroll_loops, ?vectorize_slp, ?vectorize_loop) ;
535
562
let using_thin_buffers = opt_stage == llvm:: OptStage :: PreLinkThinLTO || config. bitcode_needed ( ) ;
536
563
let pgo_gen_path = get_pgo_gen_path ( config) ;
537
564
let pgo_use_path = get_pgo_use_path ( config) ;
@@ -595,8 +622,8 @@ pub(crate) unsafe fn llvm_optimize(
595
622
using_thin_buffers,
596
623
config. merge_functions ,
597
624
unroll_loops,
598
- config . vectorize_slp ,
599
- config . vectorize_loop ,
625
+ vectorize_slp,
626
+ vectorize_loop,
600
627
config. no_builtins ,
601
628
config. emit_lifetime_markers ,
602
629
sanitizer_options. as_ref ( ) ,
@@ -640,14 +667,29 @@ pub(crate) unsafe fn optimize(
640
667
unsafe { llvm:: LLVMWriteBitcodeToFile ( llmod, out. as_ptr ( ) ) } ;
641
668
}
642
669
670
+ // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
671
+
643
672
if let Some ( opt_level) = config. opt_level {
644
673
let opt_stage = match cgcx. lto {
645
674
Lto :: Fat => llvm:: OptStage :: PreLinkFatLTO ,
646
675
Lto :: Thin | Lto :: ThinLocal => llvm:: OptStage :: PreLinkThinLTO ,
647
676
_ if cgcx. opts . cg . linker_plugin_lto . enabled ( ) => llvm:: OptStage :: PreLinkThinLTO ,
648
677
_ => llvm:: OptStage :: PreLinkNoLTO ,
649
678
} ;
650
- return unsafe { llvm_optimize ( cgcx, dcx, module, config, opt_level, opt_stage) } ;
679
+
680
+ // If we know that we will later run AD, then we disable vectorization and loop unrolling
681
+ let skip_size_increasing_opts = cfg ! ( llvm_enzyme) ;
682
+ return unsafe {
683
+ llvm_optimize (
684
+ cgcx,
685
+ dcx,
686
+ module,
687
+ config,
688
+ opt_level,
689
+ opt_stage,
690
+ skip_size_increasing_opts,
691
+ )
692
+ } ;
651
693
}
652
694
Ok ( ( ) )
653
695
}
0 commit comments