Skip to content

Commit 8c46fcb

Browse files
committed
Auto merge of rust-lang#131650 - saethlin:post-mono-mir-opts, r=<try>
Add post-mono MIR optimizations Before this PR, all MIR passes had to operate on polymorphic MIR. Thus any MIR transform maybe unable to determine the type of an argument or local (because it's still generic) or it may be unable to determine which function a Call terminator is calling (because it's still generic). MIR transforms are a highly maintainable solution to a number of compiler problems, but this polymorphic limitation means that they are cannot solve some of our problems that we'd like them to; the most recent examples that come to mind are rust-lang#134082 which has extra limitations because of the polymorphic inliner, and rust-lang#139088 which is explicitly waiting for post-mono MIR passes to happen. In addition, the lack of post-mono MIR optimizations means that MIR optimizations just miss out on profitable optimizations, which are so valuable that we've added kludges like rust-lang#121421 (a MIR traversal that you better only run at mono-time). In addition, rustc_codegen_ssa is riddled with on-the-fly monomorphization and optimization; the logic for these trick that we do in codegen in my experience are hard to maintain, and I would much rather have those implemented in a MIR transform. So this PR adds a new query `codegen_mir` (the MIR for codegen, not that I like the name). I've then replaced _some_ of the kludges in rustc_codegen_ssa with `PostMono` variants of existing MIR transforms. I've also un-querified `check_mono_item` and put it at the end of the post-mono pass list. Those checks should be post-mono passes too, but I've tried to keep this PR to a reviewable size. It's easy to imagine lots of other places to use post-mono MIR opts and I want the usefulness of this to be clear while the diff is also manageable. --- This PR has a perf regression. I've hammered on the perf in a number of ways to get it down to what it is. incr-full builds suffer the most because they need to clone, intern, and cache a monomorphized copy of every MIR body. Things are mixed for every other build scenario. In almost all cases, binary sizes improve.
2 parents 0c33fe2 + 8ac6827 commit 8c46fcb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+596
-798
lines changed

Cargo.lock

-1
Original file line numberDiff line numberDiff line change
@@ -4135,7 +4135,6 @@ dependencies = [
41354135
name = "rustc_monomorphize"
41364136
version = "0.0.0"
41374137
dependencies = [
4138-
"rustc_abi",
41394138
"rustc_ast",
41404139
"rustc_attr_parsing",
41414140
"rustc_data_structures",

compiler/rustc_codegen_cranelift/src/base.rs

+1-10
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ pub(crate) fn codegen_fn<'tcx>(
4040
let symbol_name = tcx.symbol_name(instance).name.to_string();
4141
let _timer = tcx.prof.generic_activity_with_arg("codegen fn", &*symbol_name);
4242

43-
let mir = tcx.instance_mir(instance.def);
43+
let mir = tcx.codegen_mir(instance);
4444
let _mir_guard = crate::PrintOnPanic(|| {
4545
let mut buf = Vec::new();
4646
with_no_trimmed_paths!({
@@ -270,19 +270,10 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
270270
.generic_activity("codegen prelude")
271271
.run(|| crate::abi::codegen_fn_prelude(fx, start_block));
272272

273-
let reachable_blocks = traversal::mono_reachable_as_bitset(fx.mir, fx.tcx, fx.instance);
274-
275273
for (bb, bb_data) in fx.mir.basic_blocks.iter_enumerated() {
276274
let block = fx.get_block(bb);
277275
fx.bcx.switch_to_block(block);
278276

279-
if !reachable_blocks.contains(bb) {
280-
// We want to skip this block, because it's not reachable. But we still create
281-
// the block so terminators in other blocks can reference it.
282-
fx.bcx.ins().trap(TrapCode::user(1 /* unreachable */).unwrap());
283-
continue;
284-
}
285-
286277
if bb_data.is_cleanup {
287278
// Unwinding after panicking is not supported
288279
continue;

compiler/rustc_codegen_ssa/src/base.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ pub(crate) fn codegen_instance<'a, 'tcx: 'a, Bx: BuilderMethods<'a, 'tcx>>(
416416
// release builds.
417417
info!("codegen_instance({})", instance);
418418

419-
mir::codegen_mir::<Bx>(cx, instance);
419+
mir::lower_mir::<Bx>(cx, instance);
420420
}
421421

422422
pub fn codegen_global_asm<'tcx, Cx>(cx: &mut Cx, item_id: ItemId)

compiler/rustc_codegen_ssa/src/mir/block.rs

-10
Original file line numberDiff line numberDiff line change
@@ -1317,16 +1317,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
13171317
}
13181318
}
13191319

1320-
pub(crate) fn codegen_block_as_unreachable(&mut self, bb: mir::BasicBlock) {
1321-
let llbb = match self.try_llbb(bb) {
1322-
Some(llbb) => llbb,
1323-
None => return,
1324-
};
1325-
let bx = &mut Bx::build(self.cx, llbb);
1326-
debug!("codegen_block_as_unreachable({:?})", bb);
1327-
bx.unreachable();
1328-
}
1329-
13301320
fn codegen_terminator(
13311321
&mut self,
13321322
bx: &mut Bx,

compiler/rustc_codegen_ssa/src/mir/mod.rs

+5-20
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
126126
where
127127
T: Copy + TypeFoldable<TyCtxt<'tcx>>,
128128
{
129-
debug!("monomorphize: self.instance={:?}", self.instance);
130-
self.instance.instantiate_mir_and_normalize_erasing_regions(
131-
self.cx.tcx(),
132-
self.cx.typing_env(),
133-
ty::EarlyBinder::bind(value),
134-
)
129+
value
135130
}
136131
}
137132

@@ -164,7 +159,7 @@ impl<'tcx, V: CodegenObject> LocalRef<'tcx, V> {
164159
///////////////////////////////////////////////////////////////////////////
165160

166161
#[instrument(level = "debug", skip(cx))]
167-
pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
162+
pub fn lower_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
168163
cx: &'a Bx::CodegenCx,
169164
instance: Instance<'tcx>,
170165
) {
@@ -173,7 +168,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
173168
let tcx = cx.tcx();
174169
let llfn = cx.get_fn(instance);
175170

176-
let mut mir = tcx.instance_mir(instance.def);
171+
let mut mir = tcx.codegen_mir(instance);
177172

178173
let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty());
179174
debug!("fn_abi: {:?}", fn_abi);
@@ -238,7 +233,8 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
238233
fx.compute_per_local_var_debug_info(&mut start_bx).unzip();
239234
fx.per_local_var_debug_info = per_local_var_debug_info;
240235

241-
let traversal_order = traversal::mono_reachable_reverse_postorder(mir, tcx, instance);
236+
let traversal_order: Vec<_> =
237+
traversal::reverse_postorder(mir).map(|(block, _data)| block).collect();
242238
let memory_locals = analyze::non_ssa_locals(&fx, &traversal_order);
243239

244240
// Allocate variable and temp allocas
@@ -298,20 +294,9 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
298294
// So drop the builder of `start_llbb` to avoid having two at the same time.
299295
drop(start_bx);
300296

301-
let mut unreached_blocks = DenseBitSet::new_filled(mir.basic_blocks.len());
302297
// Codegen the body of each reachable block using our reverse postorder list.
303298
for bb in traversal_order {
304299
fx.codegen_block(bb);
305-
unreached_blocks.remove(bb);
306-
}
307-
308-
// FIXME: These empty unreachable blocks are *mostly* a waste. They are occasionally
309-
// targets for a SwitchInt terminator, but the reimplementation of the mono-reachable
310-
// simplification in SwitchInt lowering sometimes misses cases that
311-
// mono_reachable_reverse_postorder manages to figure out.
312-
// The solution is to do something like post-mono GVN. But for now we have this hack.
313-
for bb in unreached_blocks.iter() {
314-
fx.codegen_block_as_unreachable(bb);
315300
}
316301
}
317302

compiler/rustc_middle/src/mir/basic_blocks.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ impl<'tcx> BasicBlocks<'tcx> {
8080
#[inline]
8181
pub fn reverse_postorder(&self) -> &[BasicBlock] {
8282
self.cache.reverse_postorder.get_or_init(|| {
83-
let mut rpo: Vec<_> = Postorder::new(&self.basic_blocks, START_BLOCK, None).collect();
83+
let mut rpo: Vec<_> = Postorder::new(&self.basic_blocks, START_BLOCK).collect();
8484
rpo.reverse();
8585
rpo
8686
})

compiler/rustc_middle/src/mir/mod.rs

+2-83
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ use crate::mir::interpret::{AllocRange, Scalar};
3333
use crate::ty::codec::{TyDecoder, TyEncoder};
3434
use crate::ty::print::{FmtPrinter, Printer, pretty_print_const, with_no_trimmed_paths};
3535
use crate::ty::{
36-
self, GenericArg, GenericArgsRef, Instance, InstanceKind, List, Ty, TyCtxt, TypeVisitableExt,
37-
TypingEnv, UserTypeAnnotationIndex,
36+
self, GenericArg, GenericArgsRef, InstanceKind, List, Ty, TyCtxt, TypeVisitableExt, TypingEnv,
37+
UserTypeAnnotationIndex,
3838
};
3939

4040
mod basic_blocks;
@@ -633,74 +633,6 @@ impl<'tcx> Body<'tcx> {
633633
self.injection_phase.is_some()
634634
}
635635

636-
/// If this basic block ends with a [`TerminatorKind::SwitchInt`] for which we can evaluate the
637-
/// discriminant in monomorphization, we return the discriminant bits and the
638-
/// [`SwitchTargets`], just so the caller doesn't also have to match on the terminator.
639-
fn try_const_mono_switchint<'a>(
640-
tcx: TyCtxt<'tcx>,
641-
instance: Instance<'tcx>,
642-
block: &'a BasicBlockData<'tcx>,
643-
) -> Option<(u128, &'a SwitchTargets)> {
644-
// There are two places here we need to evaluate a constant.
645-
let eval_mono_const = |constant: &ConstOperand<'tcx>| {
646-
// FIXME(#132279): what is this, why are we using an empty environment here.
647-
let typing_env = ty::TypingEnv::fully_monomorphized();
648-
let mono_literal = instance.instantiate_mir_and_normalize_erasing_regions(
649-
tcx,
650-
typing_env,
651-
crate::ty::EarlyBinder::bind(constant.const_),
652-
);
653-
mono_literal.try_eval_bits(tcx, typing_env)
654-
};
655-
656-
let TerminatorKind::SwitchInt { discr, targets } = &block.terminator().kind else {
657-
return None;
658-
};
659-
660-
// If this is a SwitchInt(const _), then we can just evaluate the constant and return.
661-
let discr = match discr {
662-
Operand::Constant(constant) => {
663-
let bits = eval_mono_const(constant)?;
664-
return Some((bits, targets));
665-
}
666-
Operand::Move(place) | Operand::Copy(place) => place,
667-
};
668-
669-
// MIR for `if false` actually looks like this:
670-
// _1 = const _
671-
// SwitchInt(_1)
672-
//
673-
// And MIR for if intrinsics::ub_checks() looks like this:
674-
// _1 = UbChecks()
675-
// SwitchInt(_1)
676-
//
677-
// So we're going to try to recognize this pattern.
678-
//
679-
// If we have a SwitchInt on a non-const place, we find the most recent statement that
680-
// isn't a storage marker. If that statement is an assignment of a const to our
681-
// discriminant place, we evaluate and return the const, as if we've const-propagated it
682-
// into the SwitchInt.
683-
684-
let last_stmt = block.statements.iter().rev().find(|stmt| {
685-
!matches!(stmt.kind, StatementKind::StorageDead(_) | StatementKind::StorageLive(_))
686-
})?;
687-
688-
let (place, rvalue) = last_stmt.kind.as_assign()?;
689-
690-
if discr != place {
691-
return None;
692-
}
693-
694-
match rvalue {
695-
Rvalue::NullaryOp(NullOp::UbChecks, _) => Some((tcx.sess.ub_checks() as u128, targets)),
696-
Rvalue::Use(Operand::Constant(constant)) => {
697-
let bits = eval_mono_const(constant)?;
698-
Some((bits, targets))
699-
}
700-
_ => None,
701-
}
702-
}
703-
704636
/// For a `Location` in this scope, determine what the "caller location" at that point is. This
705637
/// is interesting because of inlining: the `#[track_caller]` attribute of inlined functions
706638
/// must be honored. Falls back to the `tracked_caller` value for `#[track_caller]` functions,
@@ -1381,19 +1313,6 @@ impl<'tcx> BasicBlockData<'tcx> {
13811313
pub fn is_empty_unreachable(&self) -> bool {
13821314
self.statements.is_empty() && matches!(self.terminator().kind, TerminatorKind::Unreachable)
13831315
}
1384-
1385-
/// Like [`Terminator::successors`] but tries to use information available from the [`Instance`]
1386-
/// to skip successors like the `false` side of an `if const {`.
1387-
///
1388-
/// This is used to implement [`traversal::mono_reachable`] and
1389-
/// [`traversal::mono_reachable_reverse_postorder`].
1390-
pub fn mono_successors(&self, tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Successors<'_> {
1391-
if let Some((bits, targets)) = Body::try_const_mono_switchint(tcx, instance, self) {
1392-
targets.successors_for_value(bits)
1393-
} else {
1394-
self.terminator().successors()
1395-
}
1396-
}
13971316
}
13981317

13991318
///////////////////////////////////////////////////////////////////////////

compiler/rustc_middle/src/mir/traversal.rs

+1-111
Original file line numberDiff line numberDiff line change
@@ -98,21 +98,17 @@ pub struct Postorder<'a, 'tcx> {
9898
basic_blocks: &'a IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
9999
visited: DenseBitSet<BasicBlock>,
100100
visit_stack: Vec<(BasicBlock, Successors<'a>)>,
101-
/// A non-empty `extra` allows for a precise calculation of the successors.
102-
extra: Option<(TyCtxt<'tcx>, Instance<'tcx>)>,
103101
}
104102

105103
impl<'a, 'tcx> Postorder<'a, 'tcx> {
106104
pub fn new(
107105
basic_blocks: &'a IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
108106
root: BasicBlock,
109-
extra: Option<(TyCtxt<'tcx>, Instance<'tcx>)>,
110107
) -> Postorder<'a, 'tcx> {
111108
let mut po = Postorder {
112109
basic_blocks,
113110
visited: DenseBitSet::new_empty(basic_blocks.len()),
114111
visit_stack: Vec::new(),
115-
extra,
116112
};
117113

118114
po.visit(root);
@@ -126,11 +122,7 @@ impl<'a, 'tcx> Postorder<'a, 'tcx> {
126122
return;
127123
}
128124
let data = &self.basic_blocks[bb];
129-
let successors = if let Some(extra) = self.extra {
130-
data.mono_successors(extra.0, extra.1)
131-
} else {
132-
data.terminator().successors()
133-
};
125+
let successors = data.terminator().successors();
134126
self.visit_stack.push((bb, successors));
135127
}
136128

@@ -225,20 +217,6 @@ pub fn postorder<'a, 'tcx>(
225217
reverse_postorder(body).rev()
226218
}
227219

228-
pub fn mono_reachable_reverse_postorder<'a, 'tcx>(
229-
body: &'a Body<'tcx>,
230-
tcx: TyCtxt<'tcx>,
231-
instance: Instance<'tcx>,
232-
) -> Vec<BasicBlock> {
233-
let mut iter = Postorder::new(&body.basic_blocks, START_BLOCK, Some((tcx, instance)));
234-
let mut items = Vec::with_capacity(body.basic_blocks.len());
235-
while let Some(block) = iter.next() {
236-
items.push(block);
237-
}
238-
items.reverse();
239-
items
240-
}
241-
242220
/// Returns an iterator over all basic blocks reachable from the `START_BLOCK` in no particular
243221
/// order.
244222
///
@@ -286,91 +264,3 @@ pub fn reverse_postorder<'a, 'tcx>(
286264
{
287265
body.basic_blocks.reverse_postorder().iter().map(|&bb| (bb, &body.basic_blocks[bb]))
288266
}
289-
290-
/// Traversal of a [`Body`] that tries to avoid unreachable blocks in a monomorphized [`Instance`].
291-
///
292-
/// This is allowed to have false positives; blocks may be visited even if they are not actually
293-
/// reachable.
294-
///
295-
/// Such a traversal is mostly useful because it lets us skip lowering the `false` side
296-
/// of `if <T as Trait>::CONST`, as well as [`NullOp::UbChecks`].
297-
///
298-
/// [`NullOp::UbChecks`]: rustc_middle::mir::NullOp::UbChecks
299-
pub fn mono_reachable<'a, 'tcx>(
300-
body: &'a Body<'tcx>,
301-
tcx: TyCtxt<'tcx>,
302-
instance: Instance<'tcx>,
303-
) -> MonoReachable<'a, 'tcx> {
304-
MonoReachable::new(body, tcx, instance)
305-
}
306-
307-
/// [`MonoReachable`] internally accumulates a [`DenseBitSet`] of visited blocks. This is just a
308-
/// convenience function to run that traversal then extract its set of reached blocks.
309-
pub fn mono_reachable_as_bitset<'a, 'tcx>(
310-
body: &'a Body<'tcx>,
311-
tcx: TyCtxt<'tcx>,
312-
instance: Instance<'tcx>,
313-
) -> DenseBitSet<BasicBlock> {
314-
let mut iter = mono_reachable(body, tcx, instance);
315-
while let Some(_) = iter.next() {}
316-
iter.visited
317-
}
318-
319-
pub struct MonoReachable<'a, 'tcx> {
320-
body: &'a Body<'tcx>,
321-
tcx: TyCtxt<'tcx>,
322-
instance: Instance<'tcx>,
323-
visited: DenseBitSet<BasicBlock>,
324-
// Other traversers track their worklist in a Vec. But we don't care about order, so we can
325-
// store ours in a DenseBitSet and thus save allocations because DenseBitSet has a small size
326-
// optimization.
327-
worklist: DenseBitSet<BasicBlock>,
328-
}
329-
330-
impl<'a, 'tcx> MonoReachable<'a, 'tcx> {
331-
pub fn new(
332-
body: &'a Body<'tcx>,
333-
tcx: TyCtxt<'tcx>,
334-
instance: Instance<'tcx>,
335-
) -> MonoReachable<'a, 'tcx> {
336-
let mut worklist = DenseBitSet::new_empty(body.basic_blocks.len());
337-
worklist.insert(START_BLOCK);
338-
MonoReachable {
339-
body,
340-
tcx,
341-
instance,
342-
visited: DenseBitSet::new_empty(body.basic_blocks.len()),
343-
worklist,
344-
}
345-
}
346-
347-
fn add_work(&mut self, blocks: impl IntoIterator<Item = BasicBlock>) {
348-
for block in blocks.into_iter() {
349-
if !self.visited.contains(block) {
350-
self.worklist.insert(block);
351-
}
352-
}
353-
}
354-
}
355-
356-
impl<'a, 'tcx> Iterator for MonoReachable<'a, 'tcx> {
357-
type Item = (BasicBlock, &'a BasicBlockData<'tcx>);
358-
359-
fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> {
360-
while let Some(idx) = self.worklist.iter().next() {
361-
self.worklist.remove(idx);
362-
if !self.visited.insert(idx) {
363-
continue;
364-
}
365-
366-
let data = &self.body[idx];
367-
368-
let targets = data.mono_successors(self.tcx, self.instance);
369-
self.add_work(targets);
370-
371-
return Some((idx, data));
372-
}
373-
374-
None
375-
}
376-
}

0 commit comments

Comments
 (0)