Skip to content

Fully destructure slice and array constants into patterns #77390

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
273 changes: 8 additions & 265 deletions compiler/rustc_mir_build/src/thir/pattern/_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,9 @@ use super::{FieldPat, Pat, PatKind, PatRange};

use rustc_arena::TypedArena;
use rustc_attr::{SignedInt, UnsignedInt};
use rustc_errors::ErrorReported;
use rustc_hir::def_id::DefId;
use rustc_hir::{HirId, RangeEnd};
use rustc_middle::mir::interpret::{truncate, AllocId, ConstValue, Pointer, Scalar};
use rustc_middle::mir::interpret::{truncate, ConstValue};
use rustc_middle::mir::Field;
use rustc_middle::ty::layout::IntegerExt;
use rustc_middle::ty::{self, Const, Ty, TyCtxt};
Expand All @@ -296,108 +295,21 @@ use rustc_span::{Span, DUMMY_SP};
use rustc_target::abi::{Integer, Size, VariantIdx};

use smallvec::{smallvec, SmallVec};
use std::borrow::Cow;
use std::cmp::{self, max, min, Ordering};
use std::convert::TryInto;
use std::fmt;
use std::iter::{FromIterator, IntoIterator};
use std::ops::RangeInclusive;

crate fn expand_pattern<'a, 'tcx>(cx: &MatchCheckCtxt<'a, 'tcx>, pat: Pat<'tcx>) -> Pat<'tcx> {
LiteralExpander { tcx: cx.tcx, param_env: cx.param_env }.fold_pattern(&pat)
crate fn expand_pattern<'tcx>(pat: Pat<'tcx>) -> Pat<'tcx> {
LiteralExpander.fold_pattern(&pat)
}

struct LiteralExpander<'tcx> {
tcx: TyCtxt<'tcx>,
param_env: ty::ParamEnv<'tcx>,
}
struct LiteralExpander;

impl<'tcx> LiteralExpander<'tcx> {
/// Derefs `val` and potentially unsizes the value if `crty` is an array and `rty` a slice.
///
/// `crty` and `rty` can differ because you can use array constants in the presence of slice
/// patterns. So the pattern may end up being a slice, but the constant is an array. We convert
/// the array to a slice in that case.
fn fold_const_value_deref(
&mut self,
val: ConstValue<'tcx>,
// the pattern's pointee type
rty: Ty<'tcx>,
// the constant's pointee type
crty: Ty<'tcx>,
) -> ConstValue<'tcx> {
debug!("fold_const_value_deref {:?} {:?} {:?}", val, rty, crty);
match (val, &crty.kind(), &rty.kind()) {
// the easy case, deref a reference
(ConstValue::Scalar(p), x, y) if x == y => {
match p {
Scalar::Ptr(p) => {
let alloc = self.tcx.global_alloc(p.alloc_id).unwrap_memory();
ConstValue::ByRef { alloc, offset: p.offset }
}
Scalar::Raw { .. } => {
let layout = self.tcx.layout_of(self.param_env.and(rty)).unwrap();
if layout.is_zst() {
// Deref of a reference to a ZST is a nop.
ConstValue::Scalar(Scalar::zst())
} else {
// FIXME(oli-obk): this is reachable for `const FOO: &&&u32 = &&&42;`
bug!("cannot deref {:#?}, {} -> {}", val, crty, rty);
}
}
}
}
// unsize array to slice if pattern is array but match value or other patterns are slice
(ConstValue::Scalar(Scalar::Ptr(p)), ty::Array(t, n), ty::Slice(u)) => {
assert_eq!(t, u);
ConstValue::Slice {
data: self.tcx.global_alloc(p.alloc_id).unwrap_memory(),
start: p.offset.bytes().try_into().unwrap(),
end: n.eval_usize(self.tcx, ty::ParamEnv::empty()).try_into().unwrap(),
}
}
// fat pointers stay the same
(ConstValue::Slice { .. }, _, _)
| (_, ty::Slice(_), ty::Slice(_))
| (_, ty::Str, ty::Str) => val,
// FIXME(oli-obk): this is reachable for `const FOO: &&&u32 = &&&42;` being used
_ => bug!("cannot deref {:#?}, {} -> {}", val, crty, rty),
}
}
}

impl<'tcx> PatternFolder<'tcx> for LiteralExpander<'tcx> {
impl<'tcx> PatternFolder<'tcx> for LiteralExpander {
fn fold_pattern(&mut self, pat: &Pat<'tcx>) -> Pat<'tcx> {
debug!("fold_pattern {:?} {:?} {:?}", pat, pat.ty.kind(), pat.kind);
match (pat.ty.kind(), &*pat.kind) {
(&ty::Ref(_, rty, _), &PatKind::Constant { value: Const { val, ty: const_ty } })
if const_ty.is_ref() =>
{
let crty =
if let ty::Ref(_, crty, _) = const_ty.kind() { crty } else { unreachable!() };
if let ty::ConstKind::Value(val) = val {
Pat {
ty: pat.ty,
span: pat.span,
kind: box PatKind::Deref {
subpattern: Pat {
ty: rty,
span: pat.span,
kind: box PatKind::Constant {
value: Const::from_value(
self.tcx,
self.fold_const_value_deref(*val, rty, crty),
rty,
),
},
},
},
}
} else {
bug!("cannot deref {:#?}, {} -> {}", val, crty, rty)
}
}

(_, &PatKind::Binding { subpattern: Some(ref s), .. }) => s.fold_with(self),
(_, &PatKind::AscribeUserType { subpattern: ref s, .. }) => s.fold_with(self),
_ => pat.super_fold_with(self),
Expand Down Expand Up @@ -965,8 +877,6 @@ impl<'tcx> Constructor<'tcx> {
.iter()
.filter_map(|c: &Constructor<'_>| match c {
Slice(slice) => Some(*slice),
// FIXME(oli-obk): implement `deref` for `ConstValue`
ConstantValue(..) => None,
_ => bug!("bad slice pattern constructor {:?}", c),
})
.map(Slice::value_kind);
Expand Down Expand Up @@ -1186,12 +1096,6 @@ impl<'p, 'tcx> Fields<'p, 'tcx> {
Fields::Slice(std::slice::from_ref(pat))
}

/// Construct a new `Fields` from the given patterns. You must be sure those patterns can't
/// contain fields that need to be filtered out. When in doubt, prefer `replace_fields`.
fn from_slice_unfiltered(pats: &'p [Pat<'tcx>]) -> Self {
Fields::Slice(pats)
}

/// Convenience; internal use.
fn wildcards_from_tys(
cx: &MatchCheckCtxt<'p, 'tcx>,
Expand Down Expand Up @@ -2207,19 +2111,7 @@ fn pat_constructor<'tcx>(
if let Some(int_range) = IntRange::from_const(tcx, param_env, value, pat.span) {
Some(IntRange(int_range))
} else {
match (value.val, &value.ty.kind()) {
(_, ty::Array(_, n)) => {
let len = n.eval_usize(tcx, param_env);
Some(Slice(Slice { array_len: Some(len), kind: FixedLen(len) }))
}
(ty::ConstKind::Value(ConstValue::Slice { start, end, .. }), ty::Slice(_)) => {
let len = (end - start) as u64;
Some(Slice(Slice { array_len: None, kind: FixedLen(len) }))
}
// FIXME(oli-obk): implement `deref` for `ConstValue`
// (ty::ConstKind::Value(ConstValue::ByRef { .. }), ty::Slice(_)) => { ... }
_ => Some(ConstantValue(value)),
}
Some(ConstantValue(value))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this related to exhaustiveness checking? Shouldn't opaque consts be treated as matching nothing for that as they might not be structural-match?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is, very good catch. We still need a variant for string slices here, as string slices are not opaque, but ConstantValue is gone now and the replacement Str only handles &str constants. There are further improvements we can do here now that everything is explicit, but I'd rather not touch compare_const_vals in this PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should note that I understand basically nothing of this code, if I caught anything that was an accident.^^

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also if the last 2 comments are bugfixes, is it possible to add a test?

Copy link
Contributor Author

@oli-obk oli-obk Oct 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was no bugfix, no behaviour was changed, all I did was to move the bail-out (that was guaranteed to happen later) to an earlier site so the rest of the code that I was able to remove became dead code. This PR generally is mostly just removing dead code that became obsolete after destructuring all structural-eq constants into (non-const) patterns. This means except for &str, all PatKind::Const are now opaque.

Edit: they were behaving opaquely before already, but there was lots of code making sure we didn't run into ICEs around them by bailing out as "not contributing to exhaustiveness".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Maybe add some doc comments to PatKind explaining this. :)

}
}
PatKind::Range(PatRange { lo, hi, end }) => {
Expand Down Expand Up @@ -2254,75 +2146,6 @@ fn pat_constructor<'tcx>(
}
}

// checks whether a constant is equal to a user-written slice pattern. Only supports byte slices,
// meaning all other types will compare unequal and thus equal patterns often do not cause the
// second pattern to lint about unreachable match arms.
fn slice_pat_covered_by_const<'tcx>(
tcx: TyCtxt<'tcx>,
_span: Span,
const_val: &'tcx ty::Const<'tcx>,
prefix: &[Pat<'tcx>],
slice: &Option<Pat<'tcx>>,
suffix: &[Pat<'tcx>],
param_env: ty::ParamEnv<'tcx>,
) -> Result<bool, ErrorReported> {
let const_val_val = if let ty::ConstKind::Value(val) = const_val.val {
val
} else {
bug!(
"slice_pat_covered_by_const: {:#?}, {:#?}, {:#?}, {:#?}",
const_val,
prefix,
slice,
suffix,
)
};

let data: &[u8] = match (const_val_val, &const_val.ty.kind()) {
(ConstValue::ByRef { offset, alloc, .. }, ty::Array(t, n)) => {
assert_eq!(*t, tcx.types.u8);
let n = n.eval_usize(tcx, param_env);
let ptr = Pointer::new(AllocId(0), offset);
alloc.get_bytes(&tcx, ptr, Size::from_bytes(n)).unwrap()
}
(ConstValue::Slice { data, start, end }, ty::Slice(t)) => {
assert_eq!(*t, tcx.types.u8);
let ptr = Pointer::new(AllocId(0), Size::from_bytes(start));
data.get_bytes(&tcx, ptr, Size::from_bytes(end - start)).unwrap()
}
// FIXME(oli-obk): create a way to extract fat pointers from ByRef
(_, ty::Slice(_)) => return Ok(false),
_ => bug!(
"slice_pat_covered_by_const: {:#?}, {:#?}, {:#?}, {:#?}",
const_val,
prefix,
slice,
suffix,
),
};

let pat_len = prefix.len() + suffix.len();
if data.len() < pat_len || (slice.is_none() && data.len() > pat_len) {
return Ok(false);
}

for (ch, pat) in data[..prefix.len()]
.iter()
.zip(prefix)
.chain(data[data.len() - suffix.len()..].iter().zip(suffix))
{
if let box PatKind::Constant { value } = pat.kind {
let b = value.eval_bits(tcx, param_env, pat.ty);
assert_eq!(b as u8 as u128, b);
if b as u8 != *ch {
return Ok(false);
}
}
}

Ok(true)
}

/// For exhaustive integer matching, some constructors are grouped within other constructors
/// (namely integer typed values are grouped within ranges). However, when specialising these
/// constructors, we want to be specialising for the underlying constructors (the integers), not
Expand Down Expand Up @@ -2694,73 +2517,7 @@ fn specialize_one_pattern<'p, 'tcx>(
PatKind::Deref { ref subpattern } => Some(Fields::from_single_pattern(subpattern)),

PatKind::Constant { value } if constructor.is_slice() => {
// We extract an `Option` for the pointer because slices of zero
// elements don't necessarily point to memory, they are usually
// just integers. The only time they should be pointing to memory
// is when they are subslices of nonzero slices.
let (alloc, offset, n, ty) = match value.ty.kind() {
ty::Array(t, n) => {
let n = n.eval_usize(cx.tcx, cx.param_env);
// Shortcut for `n == 0` where no matter what `alloc` and `offset` we produce,
// the result would be exactly what we early return here.
if n == 0 {
if ctor_wild_subpatterns.len() as u64 != n {
return None;
}
return Some(Fields::empty());
}
match value.val {
ty::ConstKind::Value(ConstValue::ByRef { offset, alloc, .. }) => {
(Cow::Borrowed(alloc), offset, n, t)
}
_ => span_bug!(pat.span, "array pattern is {:?}", value,),
}
}
ty::Slice(t) => {
match value.val {
ty::ConstKind::Value(ConstValue::Slice { data, start, end }) => {
let offset = Size::from_bytes(start);
let n = (end - start) as u64;
(Cow::Borrowed(data), offset, n, t)
}
ty::ConstKind::Value(ConstValue::ByRef { .. }) => {
// FIXME(oli-obk): implement `deref` for `ConstValue`
return None;
}
_ => span_bug!(
pat.span,
"slice pattern constant must be scalar pair but is {:?}",
value,
),
}
}
_ => span_bug!(
pat.span,
"unexpected const-val {:?} with ctor {:?}",
value,
constructor,
),
};
if ctor_wild_subpatterns.len() as u64 != n {
return None;
}

// Convert a constant slice/array pattern to a list of patterns.
let layout = cx.tcx.layout_of(cx.param_env.and(ty)).ok()?;
let ptr = Pointer::new(AllocId(0), offset);
let pats = cx.pattern_arena.alloc_from_iter((0..n).filter_map(|i| {
let ptr = ptr.offset(layout.size * i, &cx.tcx).ok()?;
let scalar = alloc.read_scalar(&cx.tcx, ptr, layout.size).ok()?;
let scalar = scalar.check_init().ok()?;
let value = ty::Const::from_scalar(cx.tcx, scalar, ty);
let pattern = Pat { ty, span: pat.span, kind: box PatKind::Constant { value } };
Some(pattern)
}));
// Ensure none of the dereferences failed.
if pats.len() as u64 != n {
return None;
}
Some(Fields::from_slice_unfiltered(pats))
span_bug!(pat.span, "unexpected const-val {:?} with ctor {:?}", value, constructor)
}

PatKind::Constant { .. } | PatKind::Range { .. } => {
Expand Down Expand Up @@ -2802,21 +2559,7 @@ fn specialize_one_pattern<'p, 'tcx>(
let suffix = suffix.iter().enumerate().map(|(i, p)| (arity - suffix.len() + i, p));
Some(ctor_wild_subpatterns.replace_fields_indexed(prefix.chain(suffix)))
}
ConstantValue(cv) => {
match slice_pat_covered_by_const(
cx.tcx,
pat.span,
cv,
prefix,
slice,
suffix,
cx.param_env,
) {
Ok(true) => Some(Fields::empty()),
Ok(false) => None,
Err(ErrorReported) => None,
}
}
ConstantValue(_) => None,
_ => span_bug!(pat.span, "unexpected ctor {:?} for slice pat", constructor),
},

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_mir_build/src/thir/pattern/check_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ impl<'tcx> MatchVisitor<'_, 'tcx> {
patcx.include_lint_checks();
let pattern = patcx.lower_pattern(pat);
let pattern_ty = pattern.ty;
let pattern: &_ = cx.pattern_arena.alloc(expand_pattern(cx, pattern));
let pattern: &_ = cx.pattern_arena.alloc(expand_pattern(pattern));
if !patcx.errors.is_empty() {
*have_errors = true;
patcx.report_inlining_errors(pat.span);
Expand Down
12 changes: 7 additions & 5 deletions compiler/rustc_mir_build/src/thir/pattern/const_to_pat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,14 +387,16 @@ impl<'a, 'tcx> ConstToPat<'a, 'tcx> {
// `&str` and `&[u8]` are represented as `ConstValue::Slice`, let's keep using this
// optimization for now.
ty::Str => PatKind::Constant { value: cv },
ty::Slice(elem_ty) if elem_ty == tcx.types.u8 => PatKind::Constant { value: cv },
// `b"foo"` produces a `&[u8; 3]`, but you can't use constants of array type when
// matching against references, you can only use byte string literals.
// FIXME: clean this up, likely by permitting array patterns when matching on slices
ty::Array(elem_ty, _) if elem_ty == tcx.types.u8 => PatKind::Constant { value: cv },
// The typechecker has a special case for byte string literals, by treating them
// as slices. This means we turn `&[T; N]` constants into slice patterns, which
// has no negative effects on pattern matching, even if we're actually matching on
// arrays.
ty::Array(..) |
// Cannot merge this with the catch all branch below, because the `const_deref`
// changes the type from slice to array, and slice patterns behave differently from
// array patterns.
// changes the type from slice to array, we need to keep the original type in the
// pattern.
ty::Slice(..) => {
let old = self.behind_reference.replace(true);
let array = tcx.deref_const(self.param_env.and(cv));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ LL | match buf {
= help: ensure that all possible cases are being handled, possibly by adding wildcards or more match arms
= note: the matched value is of type `&[u8; 4]`

error[E0004]: non-exhaustive patterns: `&[]`, `&[_]`, `&[_, _]` and 2 more not covered
error[E0004]: non-exhaustive patterns: `&[0_u8..=64_u8, _, _, _]` and `&[66_u8..=u8::MAX, _, _, _]` not covered
--> $DIR/match-byte-array-patterns-2.rs:10:11
|
LL | match buf {
| ^^^ patterns `&[]`, `&[_]`, `&[_, _]` and 2 more not covered
| ^^^ patterns `&[0_u8..=64_u8, _, _, _]` and `&[66_u8..=u8::MAX, _, _, _]` not covered
|
= help: ensure that all possible cases are being handled, possibly by adding wildcards or more match arms
= note: the matched value is of type `&[u8]`
Expand Down